{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 16.2109, "active_queue_size": 16384.0, "cl_loss": 194.1458, "doc_norm": 8.4368, "encoder_q-embeddings": 7975.436, "encoder_q-layer.0": 9281.6211, "encoder_q-layer.1": 7639.3843, "encoder_q-layer.10": 19312.75, "encoder_q-layer.11": 13904.4756, "encoder_q-layer.2": 9548.2881, "encoder_q-layer.3": 9427.0547, "encoder_q-layer.4": 10111.6699, "encoder_q-layer.5": 10282.6211, "encoder_q-layer.6": 12722.626, "encoder_q-layer.7": 15509.1055, "encoder_q-layer.8": 18087.709, "encoder_q-layer.9": 14414.9727, "epoch": 0.0, "inbatch_neg_score": 38.5869, "inbatch_pos_score": 46.8438, "learning_rate": 5.000000000000001e-07, "loss": 194.1458, "norm_diff": 0.2925, "norm_loss": 0.0, "num_token_doc": 66.7563, "num_token_overlap": 14.5878, "num_token_query": 37.2929, "num_token_union": 65.2957, "num_word_context": 202.3315, "num_word_doc": 49.7714, "num_word_query": 27.9059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17262.118, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 38.625, "query_norm": 8.1443, "queue_k_norm": 8.4213, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2929, "sent_len_1": 66.7563, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9963, "stdk": 0.1812, "stdq": 0.1969, "stdqueue_k": 0.1805, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 17.6758, "active_queue_size": 16384.0, "cl_loss": 123.5938, "doc_norm": 8.3452, "encoder_q-embeddings": 2356.886, "encoder_q-layer.0": 2211.4573, "encoder_q-layer.1": 2463.8264, "encoder_q-layer.10": 5617.3389, "encoder_q-layer.11": 5810.0938, "encoder_q-layer.2": 2616.7717, "encoder_q-layer.3": 2847.2046, "encoder_q-layer.4": 3035.8228, "encoder_q-layer.5": 3146.8162, "encoder_q-layer.6": 3590.3196, "encoder_q-layer.7": 3904.9766, "encoder_q-layer.8": 4781.1602, "encoder_q-layer.9": 3973.9607, "epoch": 0.0, "inbatch_neg_score": 36.6611, "inbatch_pos_score": 42.0625, "learning_rate": 1.0000000000000002e-06, "loss": 123.5938, "norm_diff": 0.8913, "norm_loss": 0.0, "num_token_doc": 66.7272, "num_token_overlap": 14.6277, "num_token_query": 37.3192, "num_token_union": 65.2818, "num_word_context": 202.0541, "num_word_doc": 49.7635, "num_word_query": 27.9444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5113.8965, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 36.5938, "query_norm": 7.4539, "queue_k_norm": 8.3644, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3192, "sent_len_1": 66.7272, "sent_len_max_0": 127.9775, "sent_len_max_1": 189.7475, "stdk": 0.1767, "stdq": 0.1521, "stdqueue_k": 0.1788, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 18.457, "active_queue_size": 16384.0, "cl_loss": 71.9692, "doc_norm": 8.2426, "encoder_q-embeddings": 1075.9443, "encoder_q-layer.0": 911.3846, "encoder_q-layer.1": 950.0555, "encoder_q-layer.10": 2228.0586, "encoder_q-layer.11": 4089.199, "encoder_q-layer.2": 1028.134, "encoder_q-layer.3": 1062.2332, "encoder_q-layer.4": 1082.3485, "encoder_q-layer.5": 1106.1101, "encoder_q-layer.6": 1254.2588, "encoder_q-layer.7": 1365.9158, "encoder_q-layer.8": 1613.7227, "encoder_q-layer.9": 1291.2649, "epoch": 0.0, "inbatch_neg_score": 34.7057, "inbatch_pos_score": 38.3438, "learning_rate": 1.5e-06, "loss": 71.9692, "norm_diff": 1.1574, "norm_loss": 0.0, "num_token_doc": 66.8151, "num_token_overlap": 14.537, "num_token_query": 37.1361, "num_token_union": 65.3497, "num_word_context": 202.2068, "num_word_doc": 49.8677, "num_word_query": 27.7694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2752.6612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 34.7188, "query_norm": 7.0852, "queue_k_norm": 8.2391, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1361, "sent_len_1": 66.8151, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1975, "stdk": 0.174, "stdq": 0.1208, "stdqueue_k": 0.1739, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 20.3125, "active_queue_size": 16384.0, "cl_loss": 51.913, "doc_norm": 8.0915, "encoder_q-embeddings": 959.3089, "encoder_q-layer.0": 892.8272, "encoder_q-layer.1": 930.6642, "encoder_q-layer.10": 2557.2468, "encoder_q-layer.11": 4437.4087, "encoder_q-layer.2": 1035.5807, "encoder_q-layer.3": 1111.1182, "encoder_q-layer.4": 1191.9756, "encoder_q-layer.5": 1221.3245, "encoder_q-layer.6": 1352.7523, "encoder_q-layer.7": 1440.7305, "encoder_q-layer.8": 1873.0045, "encoder_q-layer.9": 1545.2184, "epoch": 0.0, "inbatch_neg_score": 33.1311, "inbatch_pos_score": 35.8125, "learning_rate": 2.0000000000000003e-06, "loss": 51.913, "norm_diff": 1.0764, "norm_loss": 0.0, "num_token_doc": 66.868, "num_token_overlap": 14.6431, "num_token_query": 37.3635, "num_token_union": 65.3683, "num_word_context": 202.1151, "num_word_doc": 49.8523, "num_word_query": 27.9857, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2810.1512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 33.0938, "query_norm": 7.0151, "queue_k_norm": 8.095, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3635, "sent_len_1": 66.868, "sent_len_max_0": 128.0, "sent_len_max_1": 191.7713, "stdk": 0.1682, "stdq": 0.1069, "stdqueue_k": 0.1693, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 18.5547, "active_queue_size": 16384.0, "cl_loss": 40.2625, "doc_norm": 7.9193, "encoder_q-embeddings": 714.0179, "encoder_q-layer.0": 629.0656, "encoder_q-layer.1": 715.1001, "encoder_q-layer.10": 2315.1084, "encoder_q-layer.11": 3731.9683, "encoder_q-layer.2": 815.8634, "encoder_q-layer.3": 818.6425, "encoder_q-layer.4": 943.3504, "encoder_q-layer.5": 1024.2067, "encoder_q-layer.6": 1091.0231, "encoder_q-layer.7": 1124.5399, "encoder_q-layer.8": 1470.0569, "encoder_q-layer.9": 1215.7653, "epoch": 0.0, "inbatch_neg_score": 31.8062, "inbatch_pos_score": 33.9688, "learning_rate": 2.5e-06, "loss": 40.2625, "norm_diff": 0.7842, "norm_loss": 0.0, "num_token_doc": 66.7352, "num_token_overlap": 14.5949, "num_token_query": 37.4364, "num_token_union": 65.4111, "num_word_context": 201.8788, "num_word_doc": 49.7935, "num_word_query": 28.0313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2256.0155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 31.7344, "query_norm": 7.1352, "queue_k_norm": 7.942, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4364, "sent_len_1": 66.7352, "sent_len_max_0": 128.0, "sent_len_max_1": 191.655, "stdk": 0.162, "stdq": 0.1001, "stdqueue_k": 0.164, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 20.8008, "active_queue_size": 16384.0, "cl_loss": 32.6603, "doc_norm": 7.7577, "encoder_q-embeddings": 740.3143, "encoder_q-layer.0": 666.7464, "encoder_q-layer.1": 759.3386, "encoder_q-layer.10": 1723.1698, "encoder_q-layer.11": 2766.8584, "encoder_q-layer.2": 807.1533, "encoder_q-layer.3": 876.2121, "encoder_q-layer.4": 963.3276, "encoder_q-layer.5": 1106.2175, "encoder_q-layer.6": 1107.5554, "encoder_q-layer.7": 1160.0038, "encoder_q-layer.8": 1457.7383, "encoder_q-layer.9": 1103.786, "epoch": 0.01, "inbatch_neg_score": 29.8372, "inbatch_pos_score": 31.6719, "learning_rate": 3e-06, "loss": 32.6603, "norm_diff": 0.6275, "norm_loss": 0.0, "num_token_doc": 66.7459, "num_token_overlap": 14.5842, "num_token_query": 37.3564, "num_token_union": 65.3998, "num_word_context": 202.5753, "num_word_doc": 49.8081, "num_word_query": 27.9712, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.5335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 29.7656, "query_norm": 7.1302, "queue_k_norm": 7.7787, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3564, "sent_len_1": 66.7459, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2312, "stdk": 0.1571, "stdq": 0.0942, "stdqueue_k": 0.1583, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 18.75, "active_queue_size": 16384.0, "cl_loss": 27.7871, "doc_norm": 7.6021, "encoder_q-embeddings": 1135.8591, "encoder_q-layer.0": 878.1107, "encoder_q-layer.1": 989.9962, "encoder_q-layer.10": 2061.1917, "encoder_q-layer.11": 3003.4368, "encoder_q-layer.2": 1156.0913, "encoder_q-layer.3": 1291.8971, "encoder_q-layer.4": 1522.5391, "encoder_q-layer.5": 1661.561, "encoder_q-layer.6": 1826.6636, "encoder_q-layer.7": 2175.8682, "encoder_q-layer.8": 2283.2649, "encoder_q-layer.9": 1097.3406, "epoch": 0.01, "inbatch_neg_score": 28.1627, "inbatch_pos_score": 29.7031, "learning_rate": 3.5000000000000004e-06, "loss": 27.7871, "norm_diff": 0.735, "norm_loss": 0.0, "num_token_doc": 66.5024, "num_token_overlap": 14.5407, "num_token_query": 37.3707, "num_token_union": 65.2872, "num_word_context": 202.1521, "num_word_doc": 49.6746, "num_word_query": 27.9659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2489.8838, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 28.1094, "query_norm": 6.8671, "queue_k_norm": 7.6175, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3707, "sent_len_1": 66.5024, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.1625, "stdk": 0.1526, "stdq": 0.0888, "stdqueue_k": 0.1529, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 21.7773, "active_queue_size": 16384.0, "cl_loss": 24.5781, "doc_norm": 7.4327, "encoder_q-embeddings": 634.2302, "encoder_q-layer.0": 516.6389, "encoder_q-layer.1": 601.1057, "encoder_q-layer.10": 1479.3937, "encoder_q-layer.11": 2183.8445, "encoder_q-layer.2": 712.654, "encoder_q-layer.3": 767.7959, "encoder_q-layer.4": 877.5681, "encoder_q-layer.5": 988.3104, "encoder_q-layer.6": 1018.4086, "encoder_q-layer.7": 1075.198, "encoder_q-layer.8": 1273.3293, "encoder_q-layer.9": 884.7354, "epoch": 0.01, "inbatch_neg_score": 24.6455, "inbatch_pos_score": 26.0312, "learning_rate": 4.000000000000001e-06, "loss": 24.5781, "norm_diff": 1.2413, "norm_loss": 0.0, "num_token_doc": 66.7126, "num_token_overlap": 14.5613, "num_token_query": 37.2784, "num_token_union": 65.3214, "num_word_context": 202.2671, "num_word_doc": 49.8042, "num_word_query": 27.9055, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1553.2115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 24.5625, "query_norm": 6.1914, "queue_k_norm": 7.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2784, "sent_len_1": 66.7126, "sent_len_max_0": 128.0, "sent_len_max_1": 188.025, "stdk": 0.145, "stdq": 0.0824, "stdqueue_k": 0.1476, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 21.4844, "active_queue_size": 16384.0, "cl_loss": 20.9434, "doc_norm": 7.3019, "encoder_q-embeddings": 881.8694, "encoder_q-layer.0": 726.0374, "encoder_q-layer.1": 838.314, "encoder_q-layer.10": 1098.1931, "encoder_q-layer.11": 1924.6027, "encoder_q-layer.2": 968.042, "encoder_q-layer.3": 1025.8354, "encoder_q-layer.4": 1127.9121, "encoder_q-layer.5": 1166.5245, "encoder_q-layer.6": 1101.3156, "encoder_q-layer.7": 1052.8994, "encoder_q-layer.8": 993.4003, "encoder_q-layer.9": 582.0045, "epoch": 0.01, "inbatch_neg_score": 18.9267, "inbatch_pos_score": 20.0781, "learning_rate": 4.5e-06, "loss": 20.9434, "norm_diff": 2.1839, "norm_loss": 0.0, "num_token_doc": 66.8776, "num_token_overlap": 14.6403, "num_token_query": 37.5155, "num_token_union": 65.4773, "num_word_context": 202.5508, "num_word_doc": 49.9117, "num_word_query": 28.1031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1566.3371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 18.8906, "query_norm": 5.118, "queue_k_norm": 7.3168, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5155, "sent_len_1": 66.8776, "sent_len_max_0": 128.0, "sent_len_max_1": 188.81, "stdk": 0.141, "stdq": 0.0805, "stdqueue_k": 0.1421, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 19.4336, "active_queue_size": 16384.0, "cl_loss": 17.9932, "doc_norm": 7.1708, "encoder_q-embeddings": 1272.3297, "encoder_q-layer.0": 1273.3102, "encoder_q-layer.1": 1397.6593, "encoder_q-layer.10": 1513.8156, "encoder_q-layer.11": 1947.1127, "encoder_q-layer.2": 1629.0901, "encoder_q-layer.3": 1710.8429, "encoder_q-layer.4": 1734.8322, "encoder_q-layer.5": 1774.405, "encoder_q-layer.6": 1519.7797, "encoder_q-layer.7": 1418.3794, "encoder_q-layer.8": 1263.9177, "encoder_q-layer.9": 608.5713, "epoch": 0.01, "inbatch_neg_score": 13.7603, "inbatch_pos_score": 14.6406, "learning_rate": 5e-06, "loss": 17.9932, "norm_diff": 2.9603, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 14.6329, "num_token_query": 37.4508, "num_token_union": 65.3771, "num_word_context": 202.0964, "num_word_doc": 49.7998, "num_word_query": 28.0377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2192.2577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 13.75, "query_norm": 4.2104, "queue_k_norm": 7.1821, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4508, "sent_len_1": 66.777, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.1363, "stdq": 0.0819, "stdqueue_k": 0.1364, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 20.7031, "active_queue_size": 16384.0, "cl_loss": 15.9719, "doc_norm": 7.064, "encoder_q-embeddings": 3765.342, "encoder_q-layer.0": 3666.5959, "encoder_q-layer.1": 4113.0835, "encoder_q-layer.10": 1876.502, "encoder_q-layer.11": 2829.7505, "encoder_q-layer.2": 3899.1147, "encoder_q-layer.3": 4075.6855, "encoder_q-layer.4": 3956.8293, "encoder_q-layer.5": 1955.6318, "encoder_q-layer.6": 1739.269, "encoder_q-layer.7": 1394.9734, "encoder_q-layer.8": 1300.2362, "encoder_q-layer.9": 844.3778, "epoch": 0.01, "inbatch_neg_score": 10.1589, "inbatch_pos_score": 10.9375, "learning_rate": 5.500000000000001e-06, "loss": 15.9719, "norm_diff": 3.5782, "norm_loss": 0.0, "num_token_doc": 66.8266, "num_token_overlap": 14.5806, "num_token_query": 37.2245, "num_token_union": 65.3747, "num_word_context": 202.2569, "num_word_doc": 49.8782, "num_word_query": 27.8779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4494.4476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 10.1484, "query_norm": 3.4857, "queue_k_norm": 7.06, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2245, "sent_len_1": 66.8266, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4638, "stdk": 0.1318, "stdq": 0.0782, "stdqueue_k": 0.1315, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 21.875, "active_queue_size": 16384.0, "cl_loss": 14.8374, "doc_norm": 6.9479, "encoder_q-embeddings": 1429.254, "encoder_q-layer.0": 1363.314, "encoder_q-layer.1": 1545.1763, "encoder_q-layer.10": 901.1591, "encoder_q-layer.11": 1557.3397, "encoder_q-layer.2": 1720.5312, "encoder_q-layer.3": 2022.7922, "encoder_q-layer.4": 2246.4329, "encoder_q-layer.5": 2506.8962, "encoder_q-layer.6": 2350.0493, "encoder_q-layer.7": 1826.559, "encoder_q-layer.8": 1839.2855, "encoder_q-layer.9": 518.8706, "epoch": 0.01, "inbatch_neg_score": 7.5966, "inbatch_pos_score": 8.2969, "learning_rate": 6e-06, "loss": 14.8374, "norm_diff": 4.0159, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 14.5366, "num_token_query": 37.1217, "num_token_union": 65.3341, "num_word_context": 201.7482, "num_word_doc": 49.836, "num_word_query": 27.7949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2569.9801, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 7.5781, "query_norm": 2.9319, "queue_k_norm": 6.9515, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1217, "sent_len_1": 66.8029, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.9475, "stdk": 0.1256, "stdq": 0.0699, "stdqueue_k": 0.1264, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 22.9492, "active_queue_size": 16384.0, "cl_loss": 13.402, "doc_norm": 6.8461, "encoder_q-embeddings": 2282.9417, "encoder_q-layer.0": 2063.5476, "encoder_q-layer.1": 2554.3679, "encoder_q-layer.10": 921.2824, "encoder_q-layer.11": 1562.6543, "encoder_q-layer.2": 2747.0603, "encoder_q-layer.3": 2591.4104, "encoder_q-layer.4": 2603.5317, "encoder_q-layer.5": 2318.8818, "encoder_q-layer.6": 2193.594, "encoder_q-layer.7": 1774.0529, "encoder_q-layer.8": 1507.6467, "encoder_q-layer.9": 415.4595, "epoch": 0.01, "inbatch_neg_score": 4.1833, "inbatch_pos_score": 4.8203, "learning_rate": 6.5000000000000004e-06, "loss": 13.402, "norm_diff": 4.3734, "norm_loss": 0.0, "num_token_doc": 66.7197, "num_token_overlap": 14.6293, "num_token_query": 37.4039, "num_token_union": 65.3365, "num_word_context": 202.6411, "num_word_doc": 49.7897, "num_word_query": 28.019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3121.2615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 4.1797, "query_norm": 2.4727, "queue_k_norm": 6.8556, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4039, "sent_len_1": 66.7197, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2488, "stdk": 0.1216, "stdq": 0.066, "stdqueue_k": 0.1215, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 21.875, "active_queue_size": 16384.0, "cl_loss": 12.4718, "doc_norm": 6.7572, "encoder_q-embeddings": 930.9974, "encoder_q-layer.0": 761.2678, "encoder_q-layer.1": 857.5677, "encoder_q-layer.10": 1060.9839, "encoder_q-layer.11": 1680.1694, "encoder_q-layer.2": 903.915, "encoder_q-layer.3": 892.1885, "encoder_q-layer.4": 947.2903, "encoder_q-layer.5": 934.0374, "encoder_q-layer.6": 1002.4002, "encoder_q-layer.7": 858.4871, "encoder_q-layer.8": 928.276, "encoder_q-layer.9": 438.4003, "epoch": 0.01, "inbatch_neg_score": 3.4042, "inbatch_pos_score": 4.0, "learning_rate": 7.000000000000001e-06, "loss": 12.4718, "norm_diff": 4.4465, "norm_loss": 0.0, "num_token_doc": 67.0675, "num_token_overlap": 14.6825, "num_token_query": 37.4611, "num_token_union": 65.5365, "num_word_context": 202.15, "num_word_doc": 50.087, "num_word_query": 28.0416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1390.6216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 3.3906, "query_norm": 2.3107, "queue_k_norm": 6.7614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4611, "sent_len_1": 67.0675, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.525, "stdk": 0.1156, "stdq": 0.0631, "stdqueue_k": 0.1167, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 24.6094, "active_queue_size": 16384.0, "cl_loss": 11.9936, "doc_norm": 6.6689, "encoder_q-embeddings": 1094.4907, "encoder_q-layer.0": 1044.4388, "encoder_q-layer.1": 1236.5601, "encoder_q-layer.10": 1287.1862, "encoder_q-layer.11": 2106.4812, "encoder_q-layer.2": 1262.2346, "encoder_q-layer.3": 1209.079, "encoder_q-layer.4": 1132.9655, "encoder_q-layer.5": 1058.5283, "encoder_q-layer.6": 966.4668, "encoder_q-layer.7": 850.8129, "encoder_q-layer.8": 804.4186, "encoder_q-layer.9": 578.23, "epoch": 0.01, "inbatch_neg_score": 4.3288, "inbatch_pos_score": 4.9297, "learning_rate": 7.5e-06, "loss": 11.9936, "norm_diff": 4.341, "norm_loss": 0.0, "num_token_doc": 66.6088, "num_token_overlap": 14.5443, "num_token_query": 37.2471, "num_token_union": 65.2981, "num_word_context": 202.2152, "num_word_doc": 49.664, "num_word_query": 27.8431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1678.2943, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 4.3125, "query_norm": 2.3278, "queue_k_norm": 6.6811, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2471, "sent_len_1": 66.6088, "sent_len_max_0": 128.0, "sent_len_max_1": 190.595, "stdk": 0.1112, "stdq": 0.0624, "stdqueue_k": 0.1126, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 21.6797, "active_queue_size": 16384.0, "cl_loss": 11.6802, "doc_norm": 6.599, "encoder_q-embeddings": 772.2969, "encoder_q-layer.0": 654.7974, "encoder_q-layer.1": 774.2084, "encoder_q-layer.10": 530.0367, "encoder_q-layer.11": 1179.4899, "encoder_q-layer.2": 901.6348, "encoder_q-layer.3": 873.1348, "encoder_q-layer.4": 870.7625, "encoder_q-layer.5": 858.5498, "encoder_q-layer.6": 665.9461, "encoder_q-layer.7": 538.7513, "encoder_q-layer.8": 542.8779, "encoder_q-layer.9": 318.2919, "epoch": 0.02, "inbatch_neg_score": 3.229, "inbatch_pos_score": 3.8125, "learning_rate": 8.000000000000001e-06, "loss": 11.6802, "norm_diff": 4.3691, "norm_loss": 0.0, "num_token_doc": 66.5728, "num_token_overlap": 14.5792, "num_token_query": 37.2431, "num_token_union": 65.2555, "num_word_context": 201.8937, "num_word_doc": 49.7037, "num_word_query": 27.906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1118.5646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 3.2188, "query_norm": 2.2299, "queue_k_norm": 6.5982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2431, "sent_len_1": 66.5728, "sent_len_max_0": 128.0, "sent_len_max_1": 188.745, "stdk": 0.1074, "stdq": 0.0614, "stdqueue_k": 0.108, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 24.5117, "active_queue_size": 16384.0, "cl_loss": 11.508, "doc_norm": 6.5112, "encoder_q-embeddings": 1759.2312, "encoder_q-layer.0": 1480.2734, "encoder_q-layer.1": 1521.5992, "encoder_q-layer.10": 683.5996, "encoder_q-layer.11": 1303.4663, "encoder_q-layer.2": 1511.4972, "encoder_q-layer.3": 1348.5183, "encoder_q-layer.4": 1296.4844, "encoder_q-layer.5": 1152.5134, "encoder_q-layer.6": 1053.5812, "encoder_q-layer.7": 1018.7279, "encoder_q-layer.8": 1082.3263, "encoder_q-layer.9": 422.7108, "epoch": 0.02, "inbatch_neg_score": 3.452, "inbatch_pos_score": 4.0391, "learning_rate": 8.500000000000002e-06, "loss": 11.508, "norm_diff": 4.2865, "norm_loss": 0.0, "num_token_doc": 66.5537, "num_token_overlap": 14.5427, "num_token_query": 37.1838, "num_token_union": 65.1488, "num_word_context": 201.7533, "num_word_doc": 49.6678, "num_word_query": 27.8249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1896.2649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 3.4414, "query_norm": 2.2246, "queue_k_norm": 6.5214, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1838, "sent_len_1": 66.5537, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.59, "stdk": 0.103, "stdq": 0.0605, "stdqueue_k": 0.1037, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 21.1914, "active_queue_size": 16384.0, "cl_loss": 11.3694, "doc_norm": 6.4182, "encoder_q-embeddings": 1680.0654, "encoder_q-layer.0": 1440.5753, "encoder_q-layer.1": 1736.5789, "encoder_q-layer.10": 757.0913, "encoder_q-layer.11": 1566.2153, "encoder_q-layer.2": 1973.9683, "encoder_q-layer.3": 2045.5223, "encoder_q-layer.4": 2373.5515, "encoder_q-layer.5": 2570.0583, "encoder_q-layer.6": 2293.6648, "encoder_q-layer.7": 1837.9972, "encoder_q-layer.8": 1699.0508, "encoder_q-layer.9": 487.3298, "epoch": 0.02, "inbatch_neg_score": 2.5779, "inbatch_pos_score": 3.1172, "learning_rate": 9e-06, "loss": 11.3694, "norm_diff": 4.1227, "norm_loss": 0.0, "num_token_doc": 66.8627, "num_token_overlap": 14.5923, "num_token_query": 37.3561, "num_token_union": 65.4834, "num_word_context": 202.5795, "num_word_doc": 49.9029, "num_word_query": 27.9219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2680.4534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.5801, "query_norm": 2.2955, "queue_k_norm": 6.4358, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3561, "sent_len_1": 66.8627, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.7388, "stdk": 0.0994, "stdq": 0.0633, "stdqueue_k": 0.0996, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 21.7773, "active_queue_size": 16384.0, "cl_loss": 11.1851, "doc_norm": 6.3246, "encoder_q-embeddings": 1985.7189, "encoder_q-layer.0": 1592.2303, "encoder_q-layer.1": 1648.5552, "encoder_q-layer.10": 1081.6512, "encoder_q-layer.11": 1713.588, "encoder_q-layer.2": 1847.016, "encoder_q-layer.3": 1660.3925, "encoder_q-layer.4": 1622.6313, "encoder_q-layer.5": 1506.5111, "encoder_q-layer.6": 1612.6376, "encoder_q-layer.7": 1833.2396, "encoder_q-layer.8": 2034.0382, "encoder_q-layer.9": 897.881, "epoch": 0.02, "inbatch_neg_score": 1.087, "inbatch_pos_score": 1.6318, "learning_rate": 9.5e-06, "loss": 11.1851, "norm_diff": 3.9859, "norm_loss": 0.0, "num_token_doc": 66.5972, "num_token_overlap": 14.5442, "num_token_query": 37.2651, "num_token_union": 65.3091, "num_word_context": 202.3052, "num_word_doc": 49.6955, "num_word_query": 27.8959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2437.7021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0801, "query_norm": 2.3387, "queue_k_norm": 6.3399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2651, "sent_len_1": 66.5972, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.36, "stdk": 0.095, "stdq": 0.0674, "stdqueue_k": 0.0958, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 21.2891, "active_queue_size": 16384.0, "cl_loss": 10.9935, "doc_norm": 6.2253, "encoder_q-embeddings": 1304.9418, "encoder_q-layer.0": 1105.6769, "encoder_q-layer.1": 1372.2976, "encoder_q-layer.10": 738.1063, "encoder_q-layer.11": 1322.8344, "encoder_q-layer.2": 1515.002, "encoder_q-layer.3": 1448.8467, "encoder_q-layer.4": 1467.8999, "encoder_q-layer.5": 1527.6956, "encoder_q-layer.6": 1490.7589, "encoder_q-layer.7": 1604.2759, "encoder_q-layer.8": 1861.2701, "encoder_q-layer.9": 860.9147, "epoch": 0.02, "inbatch_neg_score": 2.4201, "inbatch_pos_score": 2.9707, "learning_rate": 1e-05, "loss": 10.9935, "norm_diff": 3.8085, "norm_loss": 0.0, "num_token_doc": 66.6548, "num_token_overlap": 14.5926, "num_token_query": 37.3197, "num_token_union": 65.2709, "num_word_context": 202.119, "num_word_doc": 49.7585, "num_word_query": 27.9313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2027.0419, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.4023, "query_norm": 2.4167, "queue_k_norm": 6.2337, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3197, "sent_len_1": 66.6548, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0375, "stdk": 0.0907, "stdq": 0.0665, "stdqueue_k": 0.0922, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 21.2891, "active_queue_size": 16384.0, "cl_loss": 10.8318, "doc_norm": 6.0859, "encoder_q-embeddings": 1545.9772, "encoder_q-layer.0": 1317.1818, "encoder_q-layer.1": 1436.6669, "encoder_q-layer.10": 964.449, "encoder_q-layer.11": 1978.9459, "encoder_q-layer.2": 1776.2677, "encoder_q-layer.3": 1734.8514, "encoder_q-layer.4": 1670.9894, "encoder_q-layer.5": 1872.7405, "encoder_q-layer.6": 1637.4487, "encoder_q-layer.7": 1480.2177, "encoder_q-layer.8": 1400.578, "encoder_q-layer.9": 750.5076, "epoch": 0.02, "inbatch_neg_score": 2.4362, "inbatch_pos_score": 2.9414, "learning_rate": 1.05e-05, "loss": 10.8318, "norm_diff": 3.551, "norm_loss": 0.0, "num_token_doc": 66.701, "num_token_overlap": 14.4679, "num_token_query": 37.0008, "num_token_union": 65.222, "num_word_context": 202.367, "num_word_doc": 49.8295, "num_word_query": 27.6825, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2238.7261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.4238, "query_norm": 2.535, "queue_k_norm": 6.106, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0008, "sent_len_1": 66.701, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9325, "stdk": 0.0885, "stdq": 0.0714, "stdqueue_k": 0.0888, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 22.4609, "active_queue_size": 16384.0, "cl_loss": 10.3749, "doc_norm": 5.9367, "encoder_q-embeddings": 1685.9889, "encoder_q-layer.0": 1557.238, "encoder_q-layer.1": 1758.8346, "encoder_q-layer.10": 3445.6995, "encoder_q-layer.11": 4259.0601, "encoder_q-layer.2": 1745.8733, "encoder_q-layer.3": 1676.4348, "encoder_q-layer.4": 1621.1475, "encoder_q-layer.5": 1488.979, "encoder_q-layer.6": 1319.3221, "encoder_q-layer.7": 1271.4083, "encoder_q-layer.8": 1683.8792, "encoder_q-layer.9": 1903.528, "epoch": 0.02, "inbatch_neg_score": 2.1153, "inbatch_pos_score": 2.6191, "learning_rate": 1.1000000000000001e-05, "loss": 10.3749, "norm_diff": 3.3645, "norm_loss": 0.0, "num_token_doc": 66.5805, "num_token_overlap": 14.6519, "num_token_query": 37.3458, "num_token_union": 65.2225, "num_word_context": 201.8809, "num_word_doc": 49.675, "num_word_query": 27.9654, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2876.2168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.0957, "query_norm": 2.5722, "queue_k_norm": 5.9542, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3458, "sent_len_1": 66.5805, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.8587, "stdk": 0.0852, "stdq": 0.0741, "stdqueue_k": 0.0855, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 21.1914, "active_queue_size": 16384.0, "cl_loss": 9.8584, "doc_norm": 5.7554, "encoder_q-embeddings": 3058.7014, "encoder_q-layer.0": 2407.8513, "encoder_q-layer.1": 2999.6814, "encoder_q-layer.10": 6180.8423, "encoder_q-layer.11": 6425.668, "encoder_q-layer.2": 3465.0366, "encoder_q-layer.3": 3536.8606, "encoder_q-layer.4": 4025.4819, "encoder_q-layer.5": 4410.0625, "encoder_q-layer.6": 4699.3218, "encoder_q-layer.7": 5534.3828, "encoder_q-layer.8": 5304.3271, "encoder_q-layer.9": 4597.3687, "epoch": 0.02, "inbatch_neg_score": 1.7123, "inbatch_pos_score": 2.209, "learning_rate": 1.1500000000000002e-05, "loss": 9.8584, "norm_diff": 3.1692, "norm_loss": 0.0, "num_token_doc": 66.6916, "num_token_overlap": 14.5316, "num_token_query": 37.3444, "num_token_union": 65.4029, "num_word_context": 202.026, "num_word_doc": 49.7609, "num_word_query": 27.9536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6175.2021, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.6943, "query_norm": 2.5862, "queue_k_norm": 5.7695, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3444, "sent_len_1": 66.6916, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0488, "stdk": 0.0815, "stdq": 0.0764, "stdqueue_k": 0.0823, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 25.7812, "active_queue_size": 16384.0, "cl_loss": 9.5524, "doc_norm": 5.5652, "encoder_q-embeddings": 822.1486, "encoder_q-layer.0": 725.1597, "encoder_q-layer.1": 801.4902, "encoder_q-layer.10": 2609.3743, "encoder_q-layer.11": 2927.0002, "encoder_q-layer.2": 952.1415, "encoder_q-layer.3": 999.7825, "encoder_q-layer.4": 1067.1525, "encoder_q-layer.5": 1215.0919, "encoder_q-layer.6": 1444.9402, "encoder_q-layer.7": 1656.4503, "encoder_q-layer.8": 1707.3444, "encoder_q-layer.9": 1778.9966, "epoch": 0.02, "inbatch_neg_score": 1.7835, "inbatch_pos_score": 2.3145, "learning_rate": 1.2e-05, "loss": 9.5524, "norm_diff": 2.9417, "norm_loss": 0.0, "num_token_doc": 66.8803, "num_token_overlap": 14.6005, "num_token_query": 37.2978, "num_token_union": 65.3615, "num_word_context": 202.3044, "num_word_doc": 49.8678, "num_word_query": 27.9339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2076.4648, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.7646, "query_norm": 2.6235, "queue_k_norm": 5.5795, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2978, "sent_len_1": 66.8803, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5563, "stdk": 0.0791, "stdq": 0.0769, "stdqueue_k": 0.0797, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 21.3867, "active_queue_size": 16384.0, "cl_loss": 9.2459, "doc_norm": 5.3422, "encoder_q-embeddings": 796.2928, "encoder_q-layer.0": 712.8962, "encoder_q-layer.1": 872.9378, "encoder_q-layer.10": 5533.5923, "encoder_q-layer.11": 5240.3408, "encoder_q-layer.2": 1094.1665, "encoder_q-layer.3": 1289.7074, "encoder_q-layer.4": 1715.0511, "encoder_q-layer.5": 2226.7837, "encoder_q-layer.6": 3249.3191, "encoder_q-layer.7": 3942.6479, "encoder_q-layer.8": 4225.4551, "encoder_q-layer.9": 4296.8672, "epoch": 0.02, "inbatch_neg_score": 1.5042, "inbatch_pos_score": 1.9883, "learning_rate": 1.25e-05, "loss": 9.2459, "norm_diff": 2.7873, "norm_loss": 0.0, "num_token_doc": 66.7698, "num_token_overlap": 14.5696, "num_token_query": 37.4296, "num_token_union": 65.4542, "num_word_context": 202.3481, "num_word_doc": 49.7896, "num_word_query": 28.0183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4146.4005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4893, "query_norm": 2.5549, "queue_k_norm": 5.3601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4296, "sent_len_1": 66.7698, "sent_len_max_0": 128.0, "sent_len_max_1": 190.375, "stdk": 0.0768, "stdq": 0.0755, "stdqueue_k": 0.0771, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 23.9258, "active_queue_size": 16384.0, "cl_loss": 8.8762, "doc_norm": 5.1165, "encoder_q-embeddings": 666.4819, "encoder_q-layer.0": 550.5584, "encoder_q-layer.1": 543.8118, "encoder_q-layer.10": 1838.1223, "encoder_q-layer.11": 2182.5005, "encoder_q-layer.2": 585.9784, "encoder_q-layer.3": 570.8592, "encoder_q-layer.4": 638.8281, "encoder_q-layer.5": 747.4945, "encoder_q-layer.6": 1069.0946, "encoder_q-layer.7": 1170.6611, "encoder_q-layer.8": 1259.2987, "encoder_q-layer.9": 1240.0641, "epoch": 0.03, "inbatch_neg_score": 0.9361, "inbatch_pos_score": 1.4443, "learning_rate": 1.3000000000000001e-05, "loss": 8.8762, "norm_diff": 2.6536, "norm_loss": 0.0, "num_token_doc": 66.6715, "num_token_overlap": 14.6183, "num_token_query": 37.2733, "num_token_union": 65.2785, "num_word_context": 202.3524, "num_word_doc": 49.7764, "num_word_query": 27.8905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1489.7209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9238, "query_norm": 2.4628, "queue_k_norm": 5.1372, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2733, "sent_len_1": 66.6715, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.7738, "stdk": 0.0746, "stdq": 0.0744, "stdqueue_k": 0.0747, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 26.2695, "active_queue_size": 16384.0, "cl_loss": 8.5228, "doc_norm": 4.8842, "encoder_q-embeddings": 565.7827, "encoder_q-layer.0": 523.8008, "encoder_q-layer.1": 678.0887, "encoder_q-layer.10": 4239.6289, "encoder_q-layer.11": 3656.8752, "encoder_q-layer.2": 863.7756, "encoder_q-layer.3": 1030.641, "encoder_q-layer.4": 1377.4574, "encoder_q-layer.5": 1757.9523, "encoder_q-layer.6": 2536.0408, "encoder_q-layer.7": 3157.2817, "encoder_q-layer.8": 3524.2913, "encoder_q-layer.9": 3764.7051, "epoch": 0.03, "inbatch_neg_score": 0.5611, "inbatch_pos_score": 1.0781, "learning_rate": 1.3500000000000001e-05, "loss": 8.5228, "norm_diff": 2.4646, "norm_loss": 0.0, "num_token_doc": 66.7982, "num_token_overlap": 14.6522, "num_token_query": 37.4032, "num_token_union": 65.3748, "num_word_context": 202.5186, "num_word_doc": 49.8307, "num_word_query": 28.0132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3291.8448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5439, "query_norm": 2.4196, "queue_k_norm": 4.8915, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4032, "sent_len_1": 66.7982, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1587, "stdk": 0.0716, "stdq": 0.072, "stdqueue_k": 0.0727, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 22.7539, "active_queue_size": 16384.0, "cl_loss": 8.3923, "doc_norm": 4.6194, "encoder_q-embeddings": 729.5176, "encoder_q-layer.0": 615.1727, "encoder_q-layer.1": 839.8428, "encoder_q-layer.10": 6921.9941, "encoder_q-layer.11": 6208.0439, "encoder_q-layer.2": 1125.8972, "encoder_q-layer.3": 1403.8032, "encoder_q-layer.4": 1983.9309, "encoder_q-layer.5": 2777.3411, "encoder_q-layer.6": 4035.3401, "encoder_q-layer.7": 5129.0396, "encoder_q-layer.8": 5691.166, "encoder_q-layer.9": 6013.1348, "epoch": 0.03, "inbatch_neg_score": 1.0487, "inbatch_pos_score": 1.5439, "learning_rate": 1.4000000000000001e-05, "loss": 8.3923, "norm_diff": 2.1752, "norm_loss": 0.0, "num_token_doc": 66.7958, "num_token_overlap": 14.5669, "num_token_query": 37.221, "num_token_union": 65.3438, "num_word_context": 202.0651, "num_word_doc": 49.8596, "num_word_query": 27.8292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5260.7368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.041, "query_norm": 2.4443, "queue_k_norm": 4.6596, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.221, "sent_len_1": 66.7958, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3063, "stdk": 0.0706, "stdq": 0.0734, "stdqueue_k": 0.0708, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 21.4844, "active_queue_size": 16384.0, "cl_loss": 8.2074, "doc_norm": 4.3898, "encoder_q-embeddings": 1091.1931, "encoder_q-layer.0": 960.0571, "encoder_q-layer.1": 1235.2124, "encoder_q-layer.10": 11146.6387, "encoder_q-layer.11": 9313.5381, "encoder_q-layer.2": 1677.1356, "encoder_q-layer.3": 2082.3298, "encoder_q-layer.4": 3131.8528, "encoder_q-layer.5": 4398.5137, "encoder_q-layer.6": 6569.8716, "encoder_q-layer.7": 8518.7559, "encoder_q-layer.8": 9355.6211, "encoder_q-layer.9": 9756.8691, "epoch": 0.03, "inbatch_neg_score": 0.1962, "inbatch_pos_score": 0.6812, "learning_rate": 1.45e-05, "loss": 8.2074, "norm_diff": 2.0777, "norm_loss": 0.0, "num_token_doc": 66.8577, "num_token_overlap": 14.5663, "num_token_query": 37.301, "num_token_union": 65.3845, "num_word_context": 201.9603, "num_word_doc": 49.8792, "num_word_query": 27.9488, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8528.4983, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1848, "query_norm": 2.3121, "queue_k_norm": 4.4197, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.301, "sent_len_1": 66.8577, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3288, "stdk": 0.069, "stdq": 0.0697, "stdqueue_k": 0.0691, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 24.5117, "active_queue_size": 16384.0, "cl_loss": 7.8059, "doc_norm": 4.1955, "encoder_q-embeddings": 572.1253, "encoder_q-layer.0": 516.9034, "encoder_q-layer.1": 640.162, "encoder_q-layer.10": 6289.8486, "encoder_q-layer.11": 5026.3398, "encoder_q-layer.2": 827.2342, "encoder_q-layer.3": 1028.2043, "encoder_q-layer.4": 1489.8403, "encoder_q-layer.5": 2088.5908, "encoder_q-layer.6": 3247.5913, "encoder_q-layer.7": 4226.2178, "encoder_q-layer.8": 4663.1929, "encoder_q-layer.9": 5056.1143, "epoch": 0.03, "inbatch_neg_score": 0.3978, "inbatch_pos_score": 0.8789, "learning_rate": 1.5e-05, "loss": 7.8059, "norm_diff": 1.9294, "norm_loss": 0.0, "num_token_doc": 66.6056, "num_token_overlap": 14.5606, "num_token_query": 37.266, "num_token_union": 65.2879, "num_word_context": 202.4115, "num_word_doc": 49.7574, "num_word_query": 27.9014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4413.9515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3892, "query_norm": 2.2661, "queue_k_norm": 4.2015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.266, "sent_len_1": 66.6056, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.7363, "stdk": 0.0675, "stdq": 0.0669, "stdqueue_k": 0.0676, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 27.7344, "active_queue_size": 16384.0, "cl_loss": 7.5286, "doc_norm": 3.9867, "encoder_q-embeddings": 499.3928, "encoder_q-layer.0": 436.995, "encoder_q-layer.1": 481.1474, "encoder_q-layer.10": 2391.1064, "encoder_q-layer.11": 2098.5164, "encoder_q-layer.2": 543.7391, "encoder_q-layer.3": 541.0486, "encoder_q-layer.4": 633.7415, "encoder_q-layer.5": 779.6633, "encoder_q-layer.6": 1157.4857, "encoder_q-layer.7": 1545.0651, "encoder_q-layer.8": 1812.1573, "encoder_q-layer.9": 1949.0735, "epoch": 0.03, "inbatch_neg_score": 0.5345, "inbatch_pos_score": 1.0059, "learning_rate": 1.55e-05, "loss": 7.5286, "norm_diff": 1.7918, "norm_loss": 0.0, "num_token_doc": 66.6193, "num_token_overlap": 14.4978, "num_token_query": 37.1821, "num_token_union": 65.3073, "num_word_context": 202.4798, "num_word_doc": 49.7435, "num_word_query": 27.8405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1741.638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5254, "query_norm": 2.1948, "queue_k_norm": 4.0006, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1821, "sent_len_1": 66.6193, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0625, "stdk": 0.0658, "stdq": 0.064, "stdqueue_k": 0.0661, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 28.3203, "active_queue_size": 16384.0, "cl_loss": 7.2507, "doc_norm": 3.8091, "encoder_q-embeddings": 664.858, "encoder_q-layer.0": 553.9355, "encoder_q-layer.1": 548.5521, "encoder_q-layer.10": 1790.8271, "encoder_q-layer.11": 1727.1355, "encoder_q-layer.2": 578.7946, "encoder_q-layer.3": 519.2296, "encoder_q-layer.4": 566.6929, "encoder_q-layer.5": 647.1762, "encoder_q-layer.6": 779.0713, "encoder_q-layer.7": 942.8659, "encoder_q-layer.8": 1145.0706, "encoder_q-layer.9": 1358.2365, "epoch": 0.03, "inbatch_neg_score": 0.649, "inbatch_pos_score": 1.1357, "learning_rate": 1.6000000000000003e-05, "loss": 7.2507, "norm_diff": 1.6872, "norm_loss": 0.0, "num_token_doc": 66.6687, "num_token_overlap": 14.5406, "num_token_query": 37.2466, "num_token_union": 65.3184, "num_word_context": 202.2039, "num_word_doc": 49.7638, "num_word_query": 27.9066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1338.6162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6396, "query_norm": 2.1219, "queue_k_norm": 3.8126, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2466, "sent_len_1": 66.6687, "sent_len_max_0": 127.9862, "sent_len_max_1": 187.19, "stdk": 0.0646, "stdq": 0.0625, "stdqueue_k": 0.0651, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 28.2227, "active_queue_size": 16384.0, "cl_loss": 6.94, "doc_norm": 3.6436, "encoder_q-embeddings": 593.4529, "encoder_q-layer.0": 532.6782, "encoder_q-layer.1": 614.9348, "encoder_q-layer.10": 2488.5225, "encoder_q-layer.11": 2150.0149, "encoder_q-layer.2": 684.9964, "encoder_q-layer.3": 674.1537, "encoder_q-layer.4": 767.0406, "encoder_q-layer.5": 968.5757, "encoder_q-layer.6": 1410.4574, "encoder_q-layer.7": 1872.2633, "encoder_q-layer.8": 2173.1416, "encoder_q-layer.9": 2216.187, "epoch": 0.03, "inbatch_neg_score": 0.4821, "inbatch_pos_score": 0.9761, "learning_rate": 1.65e-05, "loss": 6.94, "norm_diff": 1.5798, "norm_loss": 0.0, "num_token_doc": 66.9942, "num_token_overlap": 14.574, "num_token_query": 37.2961, "num_token_union": 65.4773, "num_word_context": 202.6073, "num_word_doc": 49.9887, "num_word_query": 27.937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1990.239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4775, "query_norm": 2.0638, "queue_k_norm": 3.6493, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2961, "sent_len_1": 66.9942, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1475, "stdk": 0.0639, "stdq": 0.0607, "stdqueue_k": 0.0639, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 27.5391, "active_queue_size": 16384.0, "cl_loss": 6.7638, "doc_norm": 3.4648, "encoder_q-embeddings": 642.0357, "encoder_q-layer.0": 571.8625, "encoder_q-layer.1": 644.6965, "encoder_q-layer.10": 3724.6343, "encoder_q-layer.11": 3110.1946, "encoder_q-layer.2": 708.2228, "encoder_q-layer.3": 783.7878, "encoder_q-layer.4": 1002.118, "encoder_q-layer.5": 1380.8699, "encoder_q-layer.6": 1937.6421, "encoder_q-layer.7": 2472.3677, "encoder_q-layer.8": 2572.7607, "encoder_q-layer.9": 2760.4878, "epoch": 0.03, "inbatch_neg_score": 0.4642, "inbatch_pos_score": 0.9424, "learning_rate": 1.7000000000000003e-05, "loss": 6.7638, "norm_diff": 1.4482, "norm_loss": 0.0, "num_token_doc": 66.8782, "num_token_overlap": 14.5639, "num_token_query": 37.4393, "num_token_union": 65.5212, "num_word_context": 202.6341, "num_word_doc": 49.8748, "num_word_query": 28.0563, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2651.3057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4565, "query_norm": 2.0166, "queue_k_norm": 3.4905, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4393, "sent_len_1": 66.8782, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.3175, "stdk": 0.0627, "stdq": 0.0596, "stdqueue_k": 0.0629, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 29.8828, "active_queue_size": 16384.0, "cl_loss": 6.4201, "doc_norm": 3.3453, "encoder_q-embeddings": 1012.5443, "encoder_q-layer.0": 824.0911, "encoder_q-layer.1": 815.4255, "encoder_q-layer.10": 1343.9619, "encoder_q-layer.11": 1378.5665, "encoder_q-layer.2": 919.2108, "encoder_q-layer.3": 754.5483, "encoder_q-layer.4": 630.5692, "encoder_q-layer.5": 595.1138, "encoder_q-layer.6": 713.7894, "encoder_q-layer.7": 856.7501, "encoder_q-layer.8": 1043.5966, "encoder_q-layer.9": 1027.7789, "epoch": 0.03, "inbatch_neg_score": 0.4534, "inbatch_pos_score": 0.9268, "learning_rate": 1.75e-05, "loss": 6.4201, "norm_diff": 1.3985, "norm_loss": 0.0, "num_token_doc": 66.8832, "num_token_overlap": 14.5574, "num_token_query": 37.2745, "num_token_union": 65.4079, "num_word_context": 202.1206, "num_word_doc": 49.8892, "num_word_query": 27.8907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1349.288, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4463, "query_norm": 1.9469, "queue_k_norm": 3.3502, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2745, "sent_len_1": 66.8832, "sent_len_max_0": 127.99, "sent_len_max_1": 190.85, "stdk": 0.0619, "stdq": 0.0558, "stdqueue_k": 0.0622, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 31.7383, "active_queue_size": 16384.0, "cl_loss": 6.2778, "doc_norm": 3.2156, "encoder_q-embeddings": 404.1912, "encoder_q-layer.0": 386.5216, "encoder_q-layer.1": 350.2813, "encoder_q-layer.10": 1604.4519, "encoder_q-layer.11": 1801.2168, "encoder_q-layer.2": 344.5795, "encoder_q-layer.3": 317.3538, "encoder_q-layer.4": 320.4951, "encoder_q-layer.5": 391.5447, "encoder_q-layer.6": 564.2759, "encoder_q-layer.7": 757.2828, "encoder_q-layer.8": 902.4484, "encoder_q-layer.9": 1030.6428, "epoch": 0.04, "inbatch_neg_score": 0.5335, "inbatch_pos_score": 1.0254, "learning_rate": 1.8e-05, "loss": 6.2778, "norm_diff": 1.2933, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 14.6003, "num_token_query": 37.5181, "num_token_union": 65.4796, "num_word_context": 202.5946, "num_word_doc": 49.8153, "num_word_query": 28.0999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1150.1412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.9223, "queue_k_norm": 3.2148, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5181, "sent_len_1": 66.7393, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.8125, "stdk": 0.061, "stdq": 0.0564, "stdqueue_k": 0.0611, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 32.5195, "active_queue_size": 16384.0, "cl_loss": 6.0418, "doc_norm": 3.111, "encoder_q-embeddings": 556.5793, "encoder_q-layer.0": 509.9853, "encoder_q-layer.1": 563.5864, "encoder_q-layer.10": 1463.2948, "encoder_q-layer.11": 1569.0735, "encoder_q-layer.2": 629.9763, "encoder_q-layer.3": 594.8698, "encoder_q-layer.4": 500.2901, "encoder_q-layer.5": 471.5007, "encoder_q-layer.6": 457.6551, "encoder_q-layer.7": 441.8542, "encoder_q-layer.8": 634.2032, "encoder_q-layer.9": 862.7114, "epoch": 0.04, "inbatch_neg_score": 0.3642, "inbatch_pos_score": 0.8574, "learning_rate": 1.85e-05, "loss": 6.0418, "norm_diff": 1.27, "norm_loss": 0.0, "num_token_doc": 66.8237, "num_token_overlap": 14.6684, "num_token_query": 37.5138, "num_token_union": 65.4322, "num_word_context": 202.33, "num_word_doc": 49.8443, "num_word_query": 28.0866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1096.9212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.841, "queue_k_norm": 3.0952, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5138, "sent_len_1": 66.8237, "sent_len_max_0": 128.0, "sent_len_max_1": 188.945, "stdk": 0.0603, "stdq": 0.0539, "stdqueue_k": 0.0605, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 33.8867, "active_queue_size": 16384.0, "cl_loss": 5.9112, "doc_norm": 2.9765, "encoder_q-embeddings": 619.3555, "encoder_q-layer.0": 580.2437, "encoder_q-layer.1": 558.0026, "encoder_q-layer.10": 927.8237, "encoder_q-layer.11": 1190.7451, "encoder_q-layer.2": 579.5621, "encoder_q-layer.3": 512.0024, "encoder_q-layer.4": 428.1844, "encoder_q-layer.5": 424.8261, "encoder_q-layer.6": 485.0475, "encoder_q-layer.7": 569.7836, "encoder_q-layer.8": 668.1092, "encoder_q-layer.9": 663.4111, "epoch": 0.04, "inbatch_neg_score": 0.4418, "inbatch_pos_score": 0.939, "learning_rate": 1.9e-05, "loss": 5.9112, "norm_diff": 1.1537, "norm_loss": 0.0, "num_token_doc": 66.888, "num_token_overlap": 14.5371, "num_token_query": 37.3316, "num_token_union": 65.4793, "num_word_context": 202.181, "num_word_doc": 49.8914, "num_word_query": 27.9569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 941.3462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4338, "query_norm": 1.8228, "queue_k_norm": 2.9772, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3316, "sent_len_1": 66.888, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3887, "stdk": 0.0594, "stdq": 0.0531, "stdqueue_k": 0.0598, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 30.7617, "active_queue_size": 16384.0, "cl_loss": 5.7913, "doc_norm": 2.8546, "encoder_q-embeddings": 531.2663, "encoder_q-layer.0": 459.8305, "encoder_q-layer.1": 439.37, "encoder_q-layer.10": 1382.8854, "encoder_q-layer.11": 1542.7871, "encoder_q-layer.2": 526.1471, "encoder_q-layer.3": 501.7122, "encoder_q-layer.4": 583.3542, "encoder_q-layer.5": 756.6078, "encoder_q-layer.6": 893.2902, "encoder_q-layer.7": 1013.2324, "encoder_q-layer.8": 1017.7674, "encoder_q-layer.9": 1064.7181, "epoch": 0.04, "inbatch_neg_score": 0.4773, "inbatch_pos_score": 0.9771, "learning_rate": 1.9500000000000003e-05, "loss": 5.7913, "norm_diff": 1.0458, "norm_loss": 0.0, "num_token_doc": 66.7612, "num_token_overlap": 14.6168, "num_token_query": 37.3506, "num_token_union": 65.4161, "num_word_context": 202.4845, "num_word_doc": 49.814, "num_word_query": 27.9699, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1228.3492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4731, "query_norm": 1.8088, "queue_k_norm": 2.861, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3506, "sent_len_1": 66.7612, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8812, "stdk": 0.0589, "stdq": 0.0519, "stdqueue_k": 0.0591, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 32.9102, "active_queue_size": 16384.0, "cl_loss": 5.6888, "doc_norm": 2.749, "encoder_q-embeddings": 317.5214, "encoder_q-layer.0": 282.442, "encoder_q-layer.1": 292.746, "encoder_q-layer.10": 834.7982, "encoder_q-layer.11": 1039.4519, "encoder_q-layer.2": 309.9899, "encoder_q-layer.3": 294.8406, "encoder_q-layer.4": 289.3497, "encoder_q-layer.5": 282.6963, "encoder_q-layer.6": 330.8308, "encoder_q-layer.7": 467.0014, "encoder_q-layer.8": 618.4742, "encoder_q-layer.9": 708.6074, "epoch": 0.04, "inbatch_neg_score": 0.5323, "inbatch_pos_score": 1.0303, "learning_rate": 2e-05, "loss": 5.6888, "norm_diff": 0.9791, "norm_loss": 0.0, "num_token_doc": 66.9285, "num_token_overlap": 14.5361, "num_token_query": 37.3429, "num_token_union": 65.5459, "num_word_context": 202.8391, "num_word_doc": 49.9552, "num_word_query": 27.9578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 724.6412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5293, "query_norm": 1.7699, "queue_k_norm": 2.7523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3429, "sent_len_1": 66.9285, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1213, "stdk": 0.0582, "stdq": 0.0516, "stdqueue_k": 0.0582, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 34.4727, "active_queue_size": 16384.0, "cl_loss": 5.5991, "doc_norm": 2.6337, "encoder_q-embeddings": 570.5544, "encoder_q-layer.0": 502.6103, "encoder_q-layer.1": 535.2271, "encoder_q-layer.10": 1186.5621, "encoder_q-layer.11": 1935.99, "encoder_q-layer.2": 568.224, "encoder_q-layer.3": 552.0504, "encoder_q-layer.4": 566.7447, "encoder_q-layer.5": 586.3978, "encoder_q-layer.6": 591.6648, "encoder_q-layer.7": 605.3965, "encoder_q-layer.8": 654.2308, "encoder_q-layer.9": 706.6536, "epoch": 0.04, "inbatch_neg_score": 0.4786, "inbatch_pos_score": 0.9624, "learning_rate": 2.05e-05, "loss": 5.5991, "norm_diff": 0.8958, "norm_loss": 0.0, "num_token_doc": 66.7671, "num_token_overlap": 14.5686, "num_token_query": 37.3388, "num_token_union": 65.4049, "num_word_context": 202.1415, "num_word_doc": 49.8198, "num_word_query": 27.9398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1183.9682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4719, "query_norm": 1.7378, "queue_k_norm": 2.6482, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3388, "sent_len_1": 66.7671, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.8837, "stdk": 0.0576, "stdq": 0.0498, "stdqueue_k": 0.0577, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 34.5703, "active_queue_size": 16384.0, "cl_loss": 5.5365, "doc_norm": 2.5517, "encoder_q-embeddings": 1103.5054, "encoder_q-layer.0": 917.5715, "encoder_q-layer.1": 1060.7404, "encoder_q-layer.10": 2124.5266, "encoder_q-layer.11": 2222.3086, "encoder_q-layer.2": 1091.4628, "encoder_q-layer.3": 933.5118, "encoder_q-layer.4": 911.4762, "encoder_q-layer.5": 974.9194, "encoder_q-layer.6": 1259.8602, "encoder_q-layer.7": 1749.5557, "encoder_q-layer.8": 2070.1924, "encoder_q-layer.9": 2104.1514, "epoch": 0.04, "inbatch_neg_score": 0.4777, "inbatch_pos_score": 0.9775, "learning_rate": 2.1e-05, "loss": 5.5365, "norm_diff": 0.7808, "norm_loss": 0.0, "num_token_doc": 66.7395, "num_token_overlap": 14.5632, "num_token_query": 37.23, "num_token_union": 65.277, "num_word_context": 202.1415, "num_word_doc": 49.7674, "num_word_query": 27.8922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2106.4639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4746, "query_norm": 1.7709, "queue_k_norm": 2.5516, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.23, "sent_len_1": 66.7395, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5312, "stdk": 0.057, "stdq": 0.0515, "stdqueue_k": 0.0571, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 35.1562, "active_queue_size": 16384.0, "cl_loss": 5.4467, "doc_norm": 2.4507, "encoder_q-embeddings": 595.5381, "encoder_q-layer.0": 486.1898, "encoder_q-layer.1": 488.1913, "encoder_q-layer.10": 1320.228, "encoder_q-layer.11": 1800.0902, "encoder_q-layer.2": 518.6861, "encoder_q-layer.3": 484.4893, "encoder_q-layer.4": 459.6548, "encoder_q-layer.5": 475.7388, "encoder_q-layer.6": 644.1873, "encoder_q-layer.7": 857.7827, "encoder_q-layer.8": 964.4131, "encoder_q-layer.9": 1052.1121, "epoch": 0.04, "inbatch_neg_score": 0.4856, "inbatch_pos_score": 0.9834, "learning_rate": 2.15e-05, "loss": 5.4467, "norm_diff": 0.717, "norm_loss": 0.0, "num_token_doc": 66.6239, "num_token_overlap": 14.5495, "num_token_query": 37.2819, "num_token_union": 65.3098, "num_word_context": 202.1841, "num_word_doc": 49.7285, "num_word_query": 27.893, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1237.2474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4839, "query_norm": 1.7337, "queue_k_norm": 2.4599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2819, "sent_len_1": 66.6239, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6075, "stdk": 0.0564, "stdq": 0.0495, "stdqueue_k": 0.0564, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 34.4727, "active_queue_size": 16384.0, "cl_loss": 5.3113, "doc_norm": 2.3679, "encoder_q-embeddings": 461.9868, "encoder_q-layer.0": 390.6267, "encoder_q-layer.1": 403.2888, "encoder_q-layer.10": 1820.4548, "encoder_q-layer.11": 1960.3373, "encoder_q-layer.2": 447.051, "encoder_q-layer.3": 466.9467, "encoder_q-layer.4": 456.6383, "encoder_q-layer.5": 494.158, "encoder_q-layer.6": 573.0762, "encoder_q-layer.7": 825.8218, "encoder_q-layer.8": 1012.09, "encoder_q-layer.9": 1219.2069, "epoch": 0.04, "inbatch_neg_score": 0.4659, "inbatch_pos_score": 0.9604, "learning_rate": 2.2000000000000003e-05, "loss": 5.3113, "norm_diff": 0.623, "norm_loss": 0.0, "num_token_doc": 66.6445, "num_token_overlap": 14.6414, "num_token_query": 37.5738, "num_token_union": 65.4436, "num_word_context": 202.1348, "num_word_doc": 49.7688, "num_word_query": 28.1484, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1326.6078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4626, "query_norm": 1.7449, "queue_k_norm": 2.3754, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5738, "sent_len_1": 66.6445, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5062, "stdk": 0.0554, "stdq": 0.05, "stdqueue_k": 0.0559, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 36.3281, "active_queue_size": 16384.0, "cl_loss": 5.273, "doc_norm": 2.2941, "encoder_q-embeddings": 449.121, "encoder_q-layer.0": 355.3319, "encoder_q-layer.1": 364.7046, "encoder_q-layer.10": 732.2483, "encoder_q-layer.11": 1302.658, "encoder_q-layer.2": 387.5793, "encoder_q-layer.3": 370.1357, "encoder_q-layer.4": 354.1987, "encoder_q-layer.5": 361.156, "encoder_q-layer.6": 445.9653, "encoder_q-layer.7": 557.902, "encoder_q-layer.8": 654.2477, "encoder_q-layer.9": 570.8707, "epoch": 0.04, "inbatch_neg_score": 0.4986, "inbatch_pos_score": 1.0479, "learning_rate": 2.25e-05, "loss": 5.273, "norm_diff": 0.536, "norm_loss": 0.0, "num_token_doc": 66.8951, "num_token_overlap": 14.5555, "num_token_query": 37.178, "num_token_union": 65.3607, "num_word_context": 202.4507, "num_word_doc": 49.8723, "num_word_query": 27.8085, "postclip_grad_norm": 1.0, "preclip_grad_norm": 846.0166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4985, "query_norm": 1.7581, "queue_k_norm": 2.2957, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.178, "sent_len_1": 66.8951, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.8963, "stdk": 0.0552, "stdq": 0.0507, "stdqueue_k": 0.0553, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 34.375, "active_queue_size": 16384.0, "cl_loss": 5.2091, "doc_norm": 2.2259, "encoder_q-embeddings": 1090.7083, "encoder_q-layer.0": 972.8165, "encoder_q-layer.1": 1163.9729, "encoder_q-layer.10": 929.024, "encoder_q-layer.11": 1393.5096, "encoder_q-layer.2": 1202.9436, "encoder_q-layer.3": 1244.321, "encoder_q-layer.4": 1073.576, "encoder_q-layer.5": 882.7674, "encoder_q-layer.6": 669.6318, "encoder_q-layer.7": 532.85, "encoder_q-layer.8": 586.3356, "encoder_q-layer.9": 676.9122, "epoch": 0.04, "inbatch_neg_score": 0.5164, "inbatch_pos_score": 1.0283, "learning_rate": 2.3000000000000003e-05, "loss": 5.2091, "norm_diff": 0.4604, "norm_loss": 0.0, "num_token_doc": 66.9074, "num_token_overlap": 14.6513, "num_token_query": 37.4115, "num_token_union": 65.4355, "num_word_context": 202.367, "num_word_doc": 49.8638, "num_word_query": 28.006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1471.8793, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5132, "query_norm": 1.7656, "queue_k_norm": 2.2223, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4115, "sent_len_1": 66.9074, "sent_len_max_0": 127.995, "sent_len_max_1": 191.28, "stdk": 0.0552, "stdq": 0.0505, "stdqueue_k": 0.0548, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 36.0352, "active_queue_size": 16384.0, "cl_loss": 5.1444, "doc_norm": 2.1637, "encoder_q-embeddings": 400.5728, "encoder_q-layer.0": 321.4695, "encoder_q-layer.1": 349.8753, "encoder_q-layer.10": 1131.4734, "encoder_q-layer.11": 1420.5286, "encoder_q-layer.2": 435.2189, "encoder_q-layer.3": 436.9766, "encoder_q-layer.4": 461.7507, "encoder_q-layer.5": 595.9495, "encoder_q-layer.6": 821.5169, "encoder_q-layer.7": 1181.4358, "encoder_q-layer.8": 1218.7286, "encoder_q-layer.9": 1052.8505, "epoch": 0.05, "inbatch_neg_score": 0.5206, "inbatch_pos_score": 1.0059, "learning_rate": 2.35e-05, "loss": 5.1444, "norm_diff": 0.4348, "norm_loss": 0.0, "num_token_doc": 66.7472, "num_token_overlap": 14.5802, "num_token_query": 37.3943, "num_token_union": 65.4495, "num_word_context": 202.2811, "num_word_doc": 49.8499, "num_word_query": 27.9944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1176.478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5161, "query_norm": 1.7289, "queue_k_norm": 2.1599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3943, "sent_len_1": 66.7472, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.435, "stdk": 0.0544, "stdq": 0.049, "stdqueue_k": 0.0543, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 36.5234, "active_queue_size": 16384.0, "cl_loss": 5.0946, "doc_norm": 2.095, "encoder_q-embeddings": 869.33, "encoder_q-layer.0": 772.8361, "encoder_q-layer.1": 702.9587, "encoder_q-layer.10": 1143.6267, "encoder_q-layer.11": 1826.2979, "encoder_q-layer.2": 698.9544, "encoder_q-layer.3": 660.0782, "encoder_q-layer.4": 549.3288, "encoder_q-layer.5": 487.1705, "encoder_q-layer.6": 536.6849, "encoder_q-layer.7": 570.9401, "encoder_q-layer.8": 731.675, "encoder_q-layer.9": 764.0896, "epoch": 0.05, "inbatch_neg_score": 0.619, "inbatch_pos_score": 1.1201, "learning_rate": 2.4e-05, "loss": 5.0946, "norm_diff": 0.341, "norm_loss": 0.0, "num_token_doc": 66.7315, "num_token_overlap": 14.5774, "num_token_query": 37.3372, "num_token_union": 65.3503, "num_word_context": 202.0932, "num_word_doc": 49.7629, "num_word_query": 27.953, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1251.1207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.754, "queue_k_norm": 2.0991, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3372, "sent_len_1": 66.7315, "sent_len_max_0": 128.0, "sent_len_max_1": 188.985, "stdk": 0.0534, "stdq": 0.0486, "stdqueue_k": 0.0536, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 37.4023, "active_queue_size": 16384.0, "cl_loss": 5.0593, "doc_norm": 2.0477, "encoder_q-embeddings": 704.2371, "encoder_q-layer.0": 603.3229, "encoder_q-layer.1": 610.5471, "encoder_q-layer.10": 750.615, "encoder_q-layer.11": 1166.8639, "encoder_q-layer.2": 703.8049, "encoder_q-layer.3": 661.8497, "encoder_q-layer.4": 633.2542, "encoder_q-layer.5": 646.3475, "encoder_q-layer.6": 637.2615, "encoder_q-layer.7": 724.3855, "encoder_q-layer.8": 775.5428, "encoder_q-layer.9": 672.0539, "epoch": 0.05, "inbatch_neg_score": 0.6259, "inbatch_pos_score": 1.1289, "learning_rate": 2.45e-05, "loss": 5.0593, "norm_diff": 0.289, "norm_loss": 0.0, "num_token_doc": 66.6837, "num_token_overlap": 14.5169, "num_token_query": 37.1451, "num_token_union": 65.2511, "num_word_context": 202.4099, "num_word_doc": 49.7512, "num_word_query": 27.8079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1053.129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6191, "query_norm": 1.7586, "queue_k_norm": 2.0506, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1451, "sent_len_1": 66.6837, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.8113, "stdk": 0.0531, "stdq": 0.0493, "stdqueue_k": 0.0533, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.975, "doc_norm": 2.0062, "encoder_q-embeddings": 414.5367, "encoder_q-layer.0": 321.4695, "encoder_q-layer.1": 343.7471, "encoder_q-layer.10": 579.0951, "encoder_q-layer.11": 1031.0349, "encoder_q-layer.2": 362.0779, "encoder_q-layer.3": 353.7452, "encoder_q-layer.4": 355.6157, "encoder_q-layer.5": 341.67, "encoder_q-layer.6": 367.9996, "encoder_q-layer.7": 428.5528, "encoder_q-layer.8": 525.4254, "encoder_q-layer.9": 474.7501, "epoch": 0.05, "inbatch_neg_score": 0.5564, "inbatch_pos_score": 1.083, "learning_rate": 2.5e-05, "loss": 4.975, "norm_diff": 0.2202, "norm_loss": 0.0, "num_token_doc": 66.8347, "num_token_overlap": 14.6431, "num_token_query": 37.3075, "num_token_union": 65.3602, "num_word_context": 202.4573, "num_word_doc": 49.8734, "num_word_query": 27.9424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 713.9804, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5513, "query_norm": 1.786, "queue_k_norm": 2.0077, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3075, "sent_len_1": 66.8347, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.2812, "stdk": 0.0527, "stdq": 0.0505, "stdqueue_k": 0.0528, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.9281, "doc_norm": 1.9723, "encoder_q-embeddings": 605.6461, "encoder_q-layer.0": 519.095, "encoder_q-layer.1": 581.0419, "encoder_q-layer.10": 1100.2394, "encoder_q-layer.11": 1434.6273, "encoder_q-layer.2": 696.9188, "encoder_q-layer.3": 707.4424, "encoder_q-layer.4": 794.1948, "encoder_q-layer.5": 894.9532, "encoder_q-layer.6": 1080.519, "encoder_q-layer.7": 1122.2725, "encoder_q-layer.8": 1117.4857, "encoder_q-layer.9": 1035.2072, "epoch": 0.05, "inbatch_neg_score": 0.5834, "inbatch_pos_score": 1.1357, "learning_rate": 2.5500000000000003e-05, "loss": 4.9281, "norm_diff": 0.1566, "norm_loss": 0.0, "num_token_doc": 66.6325, "num_token_overlap": 14.638, "num_token_query": 37.543, "num_token_union": 65.4357, "num_word_context": 202.0513, "num_word_doc": 49.7103, "num_word_query": 28.0992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1316.9292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5815, "query_norm": 1.8157, "queue_k_norm": 1.9716, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.543, "sent_len_1": 66.6325, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0762, "stdk": 0.0524, "stdq": 0.0501, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.8832, "doc_norm": 1.9361, "encoder_q-embeddings": 573.9119, "encoder_q-layer.0": 467.3775, "encoder_q-layer.1": 499.567, "encoder_q-layer.10": 609.3385, "encoder_q-layer.11": 1044.5975, "encoder_q-layer.2": 592.9307, "encoder_q-layer.3": 586.6234, "encoder_q-layer.4": 673.6374, "encoder_q-layer.5": 749.7042, "encoder_q-layer.6": 708.5839, "encoder_q-layer.7": 567.0093, "encoder_q-layer.8": 437.1164, "encoder_q-layer.9": 409.3333, "epoch": 0.05, "inbatch_neg_score": 0.6326, "inbatch_pos_score": 1.1963, "learning_rate": 2.6000000000000002e-05, "loss": 4.8832, "norm_diff": 0.1068, "norm_loss": 0.0, "num_token_doc": 67.1015, "num_token_overlap": 14.6112, "num_token_query": 37.4849, "num_token_union": 65.641, "num_word_context": 202.6512, "num_word_doc": 50.0804, "num_word_query": 28.0656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 943.0854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6299, "query_norm": 1.8292, "queue_k_norm": 1.9371, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4849, "sent_len_1": 67.1015, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1362, "stdk": 0.0518, "stdq": 0.0506, "stdqueue_k": 0.052, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 39.5508, "active_queue_size": 16384.0, "cl_loss": 4.8484, "doc_norm": 1.904, "encoder_q-embeddings": 1016.5186, "encoder_q-layer.0": 786.5524, "encoder_q-layer.1": 795.0523, "encoder_q-layer.10": 563.1308, "encoder_q-layer.11": 1033.7555, "encoder_q-layer.2": 929.3167, "encoder_q-layer.3": 863.9568, "encoder_q-layer.4": 668.3424, "encoder_q-layer.5": 575.7807, "encoder_q-layer.6": 617.9641, "encoder_q-layer.7": 595.6153, "encoder_q-layer.8": 498.8057, "encoder_q-layer.9": 389.2416, "epoch": 0.05, "inbatch_neg_score": 0.6437, "inbatch_pos_score": 1.1777, "learning_rate": 2.6500000000000004e-05, "loss": 4.8484, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.5383, "num_token_overlap": 14.6143, "num_token_query": 37.2831, "num_token_union": 65.182, "num_word_context": 202.1871, "num_word_doc": 49.6742, "num_word_query": 27.9169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1140.9567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6377, "query_norm": 1.838, "queue_k_norm": 1.913, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2831, "sent_len_1": 66.5383, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2512, "stdk": 0.0513, "stdq": 0.0497, "stdqueue_k": 0.0517, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 37.5, "active_queue_size": 16384.0, "cl_loss": 4.8083, "doc_norm": 1.8933, "encoder_q-embeddings": 714.944, "encoder_q-layer.0": 621.6216, "encoder_q-layer.1": 583.7958, "encoder_q-layer.10": 1046.5879, "encoder_q-layer.11": 1626.6378, "encoder_q-layer.2": 649.8657, "encoder_q-layer.3": 549.0543, "encoder_q-layer.4": 507.282, "encoder_q-layer.5": 537.652, "encoder_q-layer.6": 682.8667, "encoder_q-layer.7": 900.7488, "encoder_q-layer.8": 936.986, "encoder_q-layer.9": 827.479, "epoch": 0.05, "inbatch_neg_score": 0.6868, "inbatch_pos_score": 1.2207, "learning_rate": 2.7000000000000002e-05, "loss": 4.8083, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.5977, "num_token_overlap": 14.5521, "num_token_query": 37.2535, "num_token_union": 65.2637, "num_word_context": 202.0667, "num_word_doc": 49.66, "num_word_query": 27.9044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1218.7569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6826, "query_norm": 1.8396, "queue_k_norm": 1.8907, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2535, "sent_len_1": 66.5977, "sent_len_max_0": 128.0, "sent_len_max_1": 190.815, "stdk": 0.0514, "stdq": 0.0502, "stdqueue_k": 0.0513, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.8156, "doc_norm": 1.8706, "encoder_q-embeddings": 639.0886, "encoder_q-layer.0": 488.4243, "encoder_q-layer.1": 474.2803, "encoder_q-layer.10": 910.5646, "encoder_q-layer.11": 1646.8274, "encoder_q-layer.2": 523.7846, "encoder_q-layer.3": 475.853, "encoder_q-layer.4": 414.19, "encoder_q-layer.5": 377.5888, "encoder_q-layer.6": 344.7805, "encoder_q-layer.7": 328.6808, "encoder_q-layer.8": 431.0617, "encoder_q-layer.9": 531.376, "epoch": 0.05, "inbatch_neg_score": 0.7305, "inbatch_pos_score": 1.2441, "learning_rate": 2.7500000000000004e-05, "loss": 4.8156, "norm_diff": 0.0231, "norm_loss": 0.0, "num_token_doc": 66.6115, "num_token_overlap": 14.4668, "num_token_query": 37.0755, "num_token_union": 65.2367, "num_word_context": 201.989, "num_word_doc": 49.6707, "num_word_query": 27.7448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.2567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7251, "query_norm": 1.8527, "queue_k_norm": 1.8722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.0755, "sent_len_1": 66.6115, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3338, "stdk": 0.0508, "stdq": 0.0496, "stdqueue_k": 0.051, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.7509, "doc_norm": 1.8491, "encoder_q-embeddings": 371.3574, "encoder_q-layer.0": 288.5532, "encoder_q-layer.1": 292.5056, "encoder_q-layer.10": 560.8182, "encoder_q-layer.11": 1009.4444, "encoder_q-layer.2": 337.3878, "encoder_q-layer.3": 355.8368, "encoder_q-layer.4": 343.0788, "encoder_q-layer.5": 340.7724, "encoder_q-layer.6": 357.359, "encoder_q-layer.7": 314.8548, "encoder_q-layer.8": 352.1884, "encoder_q-layer.9": 348.0856, "epoch": 0.05, "inbatch_neg_score": 0.7174, "inbatch_pos_score": 1.2666, "learning_rate": 2.8000000000000003e-05, "loss": 4.7509, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.8182, "num_token_overlap": 14.4964, "num_token_query": 37.1147, "num_token_union": 65.3274, "num_word_context": 202.4451, "num_word_doc": 49.849, "num_word_query": 27.7906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 661.2291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7119, "query_norm": 1.8891, "queue_k_norm": 1.8569, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1147, "sent_len_1": 66.8182, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.4363, "stdk": 0.0502, "stdq": 0.0499, "stdqueue_k": 0.0506, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 38.3789, "active_queue_size": 16384.0, "cl_loss": 4.7232, "doc_norm": 1.8423, "encoder_q-embeddings": 745.5785, "encoder_q-layer.0": 639.7032, "encoder_q-layer.1": 665.9144, "encoder_q-layer.10": 608.0415, "encoder_q-layer.11": 1072.2241, "encoder_q-layer.2": 753.141, "encoder_q-layer.3": 710.9703, "encoder_q-layer.4": 664.5987, "encoder_q-layer.5": 701.946, "encoder_q-layer.6": 803.9515, "encoder_q-layer.7": 967.8867, "encoder_q-layer.8": 925.3938, "encoder_q-layer.9": 645.6507, "epoch": 0.06, "inbatch_neg_score": 0.7545, "inbatch_pos_score": 1.2842, "learning_rate": 2.8499999999999998e-05, "loss": 4.7232, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.8203, "num_token_overlap": 14.5814, "num_token_query": 37.2535, "num_token_union": 65.3849, "num_word_context": 202.9347, "num_word_doc": 49.8678, "num_word_query": 27.8971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1132.2461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7505, "query_norm": 1.8944, "queue_k_norm": 1.8478, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2535, "sent_len_1": 66.8203, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7825, "stdk": 0.0501, "stdq": 0.0492, "stdqueue_k": 0.0504, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.6789, "doc_norm": 1.8331, "encoder_q-embeddings": 318.5568, "encoder_q-layer.0": 250.0957, "encoder_q-layer.1": 253.3827, "encoder_q-layer.10": 573.7801, "encoder_q-layer.11": 971.2205, "encoder_q-layer.2": 266.6284, "encoder_q-layer.3": 274.5136, "encoder_q-layer.4": 281.2903, "encoder_q-layer.5": 305.7532, "encoder_q-layer.6": 324.6937, "encoder_q-layer.7": 356.8531, "encoder_q-layer.8": 443.4104, "encoder_q-layer.9": 403.9228, "epoch": 0.06, "inbatch_neg_score": 0.8256, "inbatch_pos_score": 1.377, "learning_rate": 2.9e-05, "loss": 4.6789, "norm_diff": 0.0934, "norm_loss": 0.0, "num_token_doc": 66.8097, "num_token_overlap": 14.6596, "num_token_query": 37.3278, "num_token_union": 65.3497, "num_word_context": 202.412, "num_word_doc": 49.8382, "num_word_query": 27.9177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 622.9708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8203, "query_norm": 1.9264, "queue_k_norm": 1.8364, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3278, "sent_len_1": 66.8097, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6175, "stdk": 0.0498, "stdq": 0.0494, "stdqueue_k": 0.0501, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.6526, "doc_norm": 1.8339, "encoder_q-embeddings": 517.8804, "encoder_q-layer.0": 404.1202, "encoder_q-layer.1": 406.0363, "encoder_q-layer.10": 1703.2002, "encoder_q-layer.11": 2690.5164, "encoder_q-layer.2": 451.3089, "encoder_q-layer.3": 447.7145, "encoder_q-layer.4": 427.7508, "encoder_q-layer.5": 443.3784, "encoder_q-layer.6": 489.3874, "encoder_q-layer.7": 565.8181, "encoder_q-layer.8": 718.0406, "encoder_q-layer.9": 931.5643, "epoch": 0.06, "inbatch_neg_score": 0.8427, "inbatch_pos_score": 1.3711, "learning_rate": 2.95e-05, "loss": 4.6526, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 66.9702, "num_token_overlap": 14.6922, "num_token_query": 37.711, "num_token_union": 65.639, "num_word_context": 202.5776, "num_word_doc": 49.9857, "num_word_query": 28.275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1428.7325, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.835, "query_norm": 1.9117, "queue_k_norm": 1.8308, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.711, "sent_len_1": 66.9702, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5987, "stdk": 0.0498, "stdq": 0.0487, "stdqueue_k": 0.0499, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 38.9648, "active_queue_size": 16384.0, "cl_loss": 4.6508, "doc_norm": 1.8228, "encoder_q-embeddings": 365.9435, "encoder_q-layer.0": 272.6247, "encoder_q-layer.1": 275.2136, "encoder_q-layer.10": 564.4985, "encoder_q-layer.11": 1038.5259, "encoder_q-layer.2": 290.5119, "encoder_q-layer.3": 291.3985, "encoder_q-layer.4": 285.0354, "encoder_q-layer.5": 295.5736, "encoder_q-layer.6": 339.5328, "encoder_q-layer.7": 390.1882, "encoder_q-layer.8": 467.3952, "encoder_q-layer.9": 414.8716, "epoch": 0.06, "inbatch_neg_score": 0.8458, "inbatch_pos_score": 1.3994, "learning_rate": 3e-05, "loss": 4.6508, "norm_diff": 0.1612, "norm_loss": 0.0, "num_token_doc": 66.6171, "num_token_overlap": 14.5489, "num_token_query": 37.2621, "num_token_union": 65.2777, "num_word_context": 202.1583, "num_word_doc": 49.7191, "num_word_query": 27.9172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 652.2144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8369, "query_norm": 1.984, "queue_k_norm": 1.8248, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2621, "sent_len_1": 66.6171, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.3675, "stdk": 0.0494, "stdq": 0.0504, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.6353, "doc_norm": 1.8198, "encoder_q-embeddings": 2218.4309, "encoder_q-layer.0": 1932.9164, "encoder_q-layer.1": 1932.2682, "encoder_q-layer.10": 968.4097, "encoder_q-layer.11": 1865.3088, "encoder_q-layer.2": 2004.4854, "encoder_q-layer.3": 1846.7491, "encoder_q-layer.4": 1752.2739, "encoder_q-layer.5": 1709.5449, "encoder_q-layer.6": 1455.422, "encoder_q-layer.7": 1366.7336, "encoder_q-layer.8": 1220.3666, "encoder_q-layer.9": 904.9686, "epoch": 0.06, "inbatch_neg_score": 0.7943, "inbatch_pos_score": 1.333, "learning_rate": 3.05e-05, "loss": 4.6353, "norm_diff": 0.2037, "norm_loss": 0.0, "num_token_doc": 66.872, "num_token_overlap": 14.5681, "num_token_query": 37.3677, "num_token_union": 65.5012, "num_word_context": 202.338, "num_word_doc": 49.9005, "num_word_query": 27.9689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2502.6171, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7866, "query_norm": 2.0234, "queue_k_norm": 1.8193, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3677, "sent_len_1": 66.872, "sent_len_max_0": 128.0, "sent_len_max_1": 189.01, "stdk": 0.0494, "stdq": 0.0517, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.6395, "doc_norm": 1.8093, "encoder_q-embeddings": 1818.2864, "encoder_q-layer.0": 1484.4154, "encoder_q-layer.1": 1264.2639, "encoder_q-layer.10": 1111.8882, "encoder_q-layer.11": 2070.9324, "encoder_q-layer.2": 1344.0896, "encoder_q-layer.3": 1265.0651, "encoder_q-layer.4": 993.059, "encoder_q-layer.5": 978.9623, "encoder_q-layer.6": 881.89, "encoder_q-layer.7": 854.011, "encoder_q-layer.8": 892.5403, "encoder_q-layer.9": 820.536, "epoch": 0.06, "inbatch_neg_score": 0.7998, "inbatch_pos_score": 1.3291, "learning_rate": 3.1e-05, "loss": 4.6395, "norm_diff": 0.1796, "norm_loss": 0.0, "num_token_doc": 66.718, "num_token_overlap": 14.6024, "num_token_query": 37.4759, "num_token_union": 65.4563, "num_word_context": 202.2851, "num_word_doc": 49.8161, "num_word_query": 28.087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1956.7556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7891, "query_norm": 1.9889, "queue_k_norm": 1.8196, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4759, "sent_len_1": 66.718, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7475, "stdk": 0.049, "stdq": 0.0493, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.6145, "doc_norm": 1.8138, "encoder_q-embeddings": 1417.3677, "encoder_q-layer.0": 1104.8011, "encoder_q-layer.1": 1068.4307, "encoder_q-layer.10": 1193.9717, "encoder_q-layer.11": 2417.1604, "encoder_q-layer.2": 1134.1946, "encoder_q-layer.3": 1145.5901, "encoder_q-layer.4": 900.6331, "encoder_q-layer.5": 810.2991, "encoder_q-layer.6": 767.001, "encoder_q-layer.7": 704.7855, "encoder_q-layer.8": 756.4762, "encoder_q-layer.9": 788.7642, "epoch": 0.06, "inbatch_neg_score": 0.8406, "inbatch_pos_score": 1.3906, "learning_rate": 3.15e-05, "loss": 4.6145, "norm_diff": 0.2014, "norm_loss": 0.0, "num_token_doc": 66.7793, "num_token_overlap": 14.5775, "num_token_query": 37.2613, "num_token_union": 65.347, "num_word_context": 201.7388, "num_word_doc": 49.8181, "num_word_query": 27.8873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1756.8113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8335, "query_norm": 2.0152, "queue_k_norm": 1.8148, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2613, "sent_len_1": 66.7793, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3812, "stdk": 0.0492, "stdq": 0.0502, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.5628, "doc_norm": 1.8067, "encoder_q-embeddings": 635.1678, "encoder_q-layer.0": 469.7411, "encoder_q-layer.1": 485.7419, "encoder_q-layer.10": 1258.2367, "encoder_q-layer.11": 2271.2966, "encoder_q-layer.2": 531.4176, "encoder_q-layer.3": 525.6882, "encoder_q-layer.4": 519.8151, "encoder_q-layer.5": 492.8511, "encoder_q-layer.6": 520.545, "encoder_q-layer.7": 562.4778, "encoder_q-layer.8": 709.3054, "encoder_q-layer.9": 772.952, "epoch": 0.06, "inbatch_neg_score": 0.8688, "inbatch_pos_score": 1.4189, "learning_rate": 3.2000000000000005e-05, "loss": 4.5628, "norm_diff": 0.1947, "norm_loss": 0.0, "num_token_doc": 67.0662, "num_token_overlap": 14.7524, "num_token_query": 37.718, "num_token_union": 65.6382, "num_word_context": 202.0432, "num_word_doc": 49.9614, "num_word_query": 28.2242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1306.33, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8608, "query_norm": 2.0013, "queue_k_norm": 1.8143, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.718, "sent_len_1": 67.0662, "sent_len_max_0": 128.0, "sent_len_max_1": 192.7312, "stdk": 0.0489, "stdq": 0.0485, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.5899, "doc_norm": 1.8097, "encoder_q-embeddings": 15253.2148, "encoder_q-layer.0": 10034.2012, "encoder_q-layer.1": 7253.3174, "encoder_q-layer.10": 1223.6825, "encoder_q-layer.11": 2154.5398, "encoder_q-layer.2": 5194.9023, "encoder_q-layer.3": 3552.0669, "encoder_q-layer.4": 2712.6587, "encoder_q-layer.5": 2231.2495, "encoder_q-layer.6": 1422.8387, "encoder_q-layer.7": 986.124, "encoder_q-layer.8": 968.6763, "encoder_q-layer.9": 932.4886, "epoch": 0.06, "inbatch_neg_score": 0.7811, "inbatch_pos_score": 1.3027, "learning_rate": 3.2500000000000004e-05, "loss": 4.5899, "norm_diff": 0.227, "norm_loss": 0.0, "num_token_doc": 66.7778, "num_token_overlap": 14.5344, "num_token_query": 37.2708, "num_token_union": 65.3675, "num_word_context": 202.1101, "num_word_doc": 49.8357, "num_word_query": 27.8762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10156.2215, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7744, "query_norm": 2.0368, "queue_k_norm": 1.8102, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2708, "sent_len_1": 66.7778, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6, "stdk": 0.049, "stdq": 0.0506, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.5724, "doc_norm": 1.8056, "encoder_q-embeddings": 1413.553, "encoder_q-layer.0": 1143.3566, "encoder_q-layer.1": 1130.834, "encoder_q-layer.10": 942.161, "encoder_q-layer.11": 1841.0896, "encoder_q-layer.2": 1270.7897, "encoder_q-layer.3": 1221.9951, "encoder_q-layer.4": 1013.7094, "encoder_q-layer.5": 919.1025, "encoder_q-layer.6": 861.264, "encoder_q-layer.7": 657.8118, "encoder_q-layer.8": 646.5986, "encoder_q-layer.9": 687.6842, "epoch": 0.06, "inbatch_neg_score": 0.7659, "inbatch_pos_score": 1.2988, "learning_rate": 3.3e-05, "loss": 4.5724, "norm_diff": 0.213, "norm_loss": 0.0, "num_token_doc": 66.6492, "num_token_overlap": 14.5991, "num_token_query": 37.4408, "num_token_union": 65.4013, "num_word_context": 202.2937, "num_word_doc": 49.7459, "num_word_query": 28.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1668.8154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7588, "query_norm": 2.0187, "queue_k_norm": 1.8021, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4408, "sent_len_1": 66.6492, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.9437, "stdk": 0.0488, "stdq": 0.0494, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.5257, "doc_norm": 1.8, "encoder_q-embeddings": 948.7932, "encoder_q-layer.0": 732.5041, "encoder_q-layer.1": 691.1128, "encoder_q-layer.10": 1017.3574, "encoder_q-layer.11": 2067.8635, "encoder_q-layer.2": 797.5155, "encoder_q-layer.3": 768.0924, "encoder_q-layer.4": 659.2006, "encoder_q-layer.5": 626.8924, "encoder_q-layer.6": 608.624, "encoder_q-layer.7": 583.9828, "encoder_q-layer.8": 681.5851, "encoder_q-layer.9": 649.5093, "epoch": 0.07, "inbatch_neg_score": 0.7249, "inbatch_pos_score": 1.2754, "learning_rate": 3.35e-05, "loss": 4.5257, "norm_diff": 0.2317, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 14.6485, "num_token_query": 37.4664, "num_token_union": 65.4857, "num_word_context": 202.3733, "num_word_doc": 49.8131, "num_word_query": 28.0611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1354.022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7178, "query_norm": 2.0317, "queue_k_norm": 1.7983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4664, "sent_len_1": 66.8218, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.1937, "stdk": 0.0488, "stdq": 0.0496, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.5433, "doc_norm": 1.7853, "encoder_q-embeddings": 2248.9893, "encoder_q-layer.0": 1739.0931, "encoder_q-layer.1": 1491.854, "encoder_q-layer.10": 827.4252, "encoder_q-layer.11": 1797.0076, "encoder_q-layer.2": 1623.6344, "encoder_q-layer.3": 1457.1486, "encoder_q-layer.4": 1252.3417, "encoder_q-layer.5": 1244.4561, "encoder_q-layer.6": 1438.5193, "encoder_q-layer.7": 1045.8094, "encoder_q-layer.8": 842.1306, "encoder_q-layer.9": 704.1414, "epoch": 0.07, "inbatch_neg_score": 0.6868, "inbatch_pos_score": 1.1992, "learning_rate": 3.4000000000000007e-05, "loss": 4.5433, "norm_diff": 0.2144, "norm_loss": 0.0, "num_token_doc": 66.5109, "num_token_overlap": 14.5415, "num_token_query": 37.3401, "num_token_union": 65.2991, "num_word_context": 202.0251, "num_word_doc": 49.6077, "num_word_query": 27.9814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2231.4686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6758, "query_norm": 1.9997, "queue_k_norm": 1.7887, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3401, "sent_len_1": 66.5109, "sent_len_max_0": 127.9975, "sent_len_max_1": 186.8438, "stdk": 0.0485, "stdq": 0.0486, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.5278, "doc_norm": 1.7863, "encoder_q-embeddings": 3146.1365, "encoder_q-layer.0": 2507.4143, "encoder_q-layer.1": 2055.7827, "encoder_q-layer.10": 891.3683, "encoder_q-layer.11": 1602.833, "encoder_q-layer.2": 2376.948, "encoder_q-layer.3": 2210.2249, "encoder_q-layer.4": 1615.7347, "encoder_q-layer.5": 1018.5998, "encoder_q-layer.6": 800.8336, "encoder_q-layer.7": 648.7856, "encoder_q-layer.8": 762.1399, "encoder_q-layer.9": 728.425, "epoch": 0.07, "inbatch_neg_score": 0.6771, "inbatch_pos_score": 1.2393, "learning_rate": 3.45e-05, "loss": 4.5278, "norm_diff": 0.2594, "norm_loss": 0.0, "num_token_doc": 66.9228, "num_token_overlap": 14.7462, "num_token_query": 37.6398, "num_token_union": 65.5207, "num_word_context": 202.5507, "num_word_doc": 49.9492, "num_word_query": 28.1916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2809.5048, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6685, "query_norm": 2.0457, "queue_k_norm": 1.7831, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.6398, "sent_len_1": 66.9228, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.435, "stdk": 0.0488, "stdq": 0.0492, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.5193, "doc_norm": 1.7799, "encoder_q-embeddings": 781.8051, "encoder_q-layer.0": 572.7197, "encoder_q-layer.1": 608.7961, "encoder_q-layer.10": 1097.0759, "encoder_q-layer.11": 2199.0359, "encoder_q-layer.2": 651.2006, "encoder_q-layer.3": 626.6122, "encoder_q-layer.4": 577.1963, "encoder_q-layer.5": 560.7289, "encoder_q-layer.6": 570.8516, "encoder_q-layer.7": 577.923, "encoder_q-layer.8": 686.8458, "encoder_q-layer.9": 726.4493, "epoch": 0.07, "inbatch_neg_score": 0.5399, "inbatch_pos_score": 1.0723, "learning_rate": 3.5e-05, "loss": 4.5193, "norm_diff": 0.2398, "norm_loss": 0.0, "num_token_doc": 66.8482, "num_token_overlap": 14.6851, "num_token_query": 37.3934, "num_token_union": 65.3755, "num_word_context": 202.077, "num_word_doc": 49.8774, "num_word_query": 27.9822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1322.1564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5312, "query_norm": 2.0197, "queue_k_norm": 1.771, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3934, "sent_len_1": 66.8482, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.0087, "stdk": 0.0487, "stdq": 0.0494, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.4774, "doc_norm": 1.7622, "encoder_q-embeddings": 2001.3932, "encoder_q-layer.0": 1592.8738, "encoder_q-layer.1": 1379.2058, "encoder_q-layer.10": 893.8911, "encoder_q-layer.11": 1643.3802, "encoder_q-layer.2": 1444.8605, "encoder_q-layer.3": 1185.7802, "encoder_q-layer.4": 956.8987, "encoder_q-layer.5": 807.7946, "encoder_q-layer.6": 762.2473, "encoder_q-layer.7": 592.6076, "encoder_q-layer.8": 585.4762, "encoder_q-layer.9": 569.6211, "epoch": 0.07, "inbatch_neg_score": 0.551, "inbatch_pos_score": 1.0791, "learning_rate": 3.55e-05, "loss": 4.4774, "norm_diff": 0.2863, "norm_loss": 0.0, "num_token_doc": 66.7236, "num_token_overlap": 14.6532, "num_token_query": 37.465, "num_token_union": 65.4124, "num_word_context": 202.5887, "num_word_doc": 49.8122, "num_word_query": 28.0613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1883.9204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5391, "query_norm": 2.0485, "queue_k_norm": 1.7615, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.465, "sent_len_1": 66.7236, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0563, "stdk": 0.0485, "stdq": 0.0486, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.4464, "doc_norm": 1.7457, "encoder_q-embeddings": 1034.8395, "encoder_q-layer.0": 745.3135, "encoder_q-layer.1": 802.5518, "encoder_q-layer.10": 812.4844, "encoder_q-layer.11": 1751.0022, "encoder_q-layer.2": 888.7792, "encoder_q-layer.3": 855.7906, "encoder_q-layer.4": 696.7255, "encoder_q-layer.5": 610.207, "encoder_q-layer.6": 576.3893, "encoder_q-layer.7": 561.9067, "encoder_q-layer.8": 662.608, "encoder_q-layer.9": 601.9418, "epoch": 0.07, "inbatch_neg_score": 0.4719, "inbatch_pos_score": 1.0117, "learning_rate": 3.6e-05, "loss": 4.4464, "norm_diff": 0.2799, "norm_loss": 0.0, "num_token_doc": 66.8415, "num_token_overlap": 14.6451, "num_token_query": 37.445, "num_token_union": 65.4871, "num_word_context": 202.6156, "num_word_doc": 49.9089, "num_word_query": 28.0452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1334.5186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4648, "query_norm": 2.0256, "queue_k_norm": 1.7492, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.445, "sent_len_1": 66.8415, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.915, "stdk": 0.0483, "stdq": 0.0485, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.4392, "doc_norm": 1.7352, "encoder_q-embeddings": 1640.2213, "encoder_q-layer.0": 1374.7068, "encoder_q-layer.1": 1336.4043, "encoder_q-layer.10": 857.6472, "encoder_q-layer.11": 1642.6206, "encoder_q-layer.2": 1300.4156, "encoder_q-layer.3": 1216.0918, "encoder_q-layer.4": 964.7532, "encoder_q-layer.5": 960.0435, "encoder_q-layer.6": 1023.6939, "encoder_q-layer.7": 738.8181, "encoder_q-layer.8": 618.7168, "encoder_q-layer.9": 520.7698, "epoch": 0.07, "inbatch_neg_score": 0.4652, "inbatch_pos_score": 0.9956, "learning_rate": 3.65e-05, "loss": 4.4392, "norm_diff": 0.3169, "norm_loss": 0.0, "num_token_doc": 66.7527, "num_token_overlap": 14.6908, "num_token_query": 37.5319, "num_token_union": 65.4158, "num_word_context": 202.597, "num_word_doc": 49.8694, "num_word_query": 28.0983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1761.2857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4534, "query_norm": 2.0522, "queue_k_norm": 1.7387, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5319, "sent_len_1": 66.7527, "sent_len_max_0": 128.0, "sent_len_max_1": 186.5175, "stdk": 0.0483, "stdq": 0.0484, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.4754, "doc_norm": 1.7221, "encoder_q-embeddings": 2865.1956, "encoder_q-layer.0": 2175.4011, "encoder_q-layer.1": 1490.3525, "encoder_q-layer.10": 908.3569, "encoder_q-layer.11": 1787.2153, "encoder_q-layer.2": 1420.0575, "encoder_q-layer.3": 1169.1542, "encoder_q-layer.4": 882.2274, "encoder_q-layer.5": 853.5897, "encoder_q-layer.6": 936.1444, "encoder_q-layer.7": 845.6757, "encoder_q-layer.8": 734.3185, "encoder_q-layer.9": 674.2144, "epoch": 0.07, "inbatch_neg_score": 0.4781, "inbatch_pos_score": 1.0137, "learning_rate": 3.7e-05, "loss": 4.4754, "norm_diff": 0.3395, "norm_loss": 0.0, "num_token_doc": 66.5665, "num_token_overlap": 14.4903, "num_token_query": 37.1999, "num_token_union": 65.2851, "num_word_context": 202.2311, "num_word_doc": 49.663, "num_word_query": 27.8474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2323.5315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4663, "query_norm": 2.0616, "queue_k_norm": 1.7254, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1999, "sent_len_1": 66.5665, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.4963, "stdk": 0.0483, "stdq": 0.0489, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.4219, "doc_norm": 1.7201, "encoder_q-embeddings": 1614.6899, "encoder_q-layer.0": 1089.6908, "encoder_q-layer.1": 964.1539, "encoder_q-layer.10": 1659.7562, "encoder_q-layer.11": 3141.6211, "encoder_q-layer.2": 1063.4594, "encoder_q-layer.3": 912.6082, "encoder_q-layer.4": 743.6994, "encoder_q-layer.5": 681.9847, "encoder_q-layer.6": 729.408, "encoder_q-layer.7": 863.805, "encoder_q-layer.8": 843.3571, "encoder_q-layer.9": 811.9924, "epoch": 0.07, "inbatch_neg_score": 0.5089, "inbatch_pos_score": 1.0322, "learning_rate": 3.7500000000000003e-05, "loss": 4.4219, "norm_diff": 0.3394, "norm_loss": 0.0, "num_token_doc": 66.6432, "num_token_overlap": 14.5981, "num_token_query": 37.3242, "num_token_union": 65.3063, "num_word_context": 202.1657, "num_word_doc": 49.7403, "num_word_query": 27.9535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1984.0842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.498, "query_norm": 2.0596, "queue_k_norm": 1.712, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3242, "sent_len_1": 66.6432, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4275, "stdk": 0.0484, "stdq": 0.0475, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.3809, "doc_norm": 1.699, "encoder_q-embeddings": 1439.0591, "encoder_q-layer.0": 1076.6541, "encoder_q-layer.1": 931.5464, "encoder_q-layer.10": 1058.3062, "encoder_q-layer.11": 2225.0625, "encoder_q-layer.2": 1005.8876, "encoder_q-layer.3": 960.976, "encoder_q-layer.4": 881.0561, "encoder_q-layer.5": 733.899, "encoder_q-layer.6": 592.1884, "encoder_q-layer.7": 501.2309, "encoder_q-layer.8": 601.5783, "encoder_q-layer.9": 606.162, "epoch": 0.07, "inbatch_neg_score": 0.4618, "inbatch_pos_score": 1.0264, "learning_rate": 3.8e-05, "loss": 4.3809, "norm_diff": 0.3719, "norm_loss": 0.0, "num_token_doc": 66.6477, "num_token_overlap": 14.5928, "num_token_query": 37.4253, "num_token_union": 65.3835, "num_word_context": 202.0786, "num_word_doc": 49.7484, "num_word_query": 28.0034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1639.4894, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4504, "query_norm": 2.0709, "queue_k_norm": 1.7013, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4253, "sent_len_1": 66.6477, "sent_len_max_0": 127.9625, "sent_len_max_1": 187.7425, "stdk": 0.0481, "stdq": 0.0484, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.4022, "doc_norm": 1.6924, "encoder_q-embeddings": 2071.8701, "encoder_q-layer.0": 1563.5728, "encoder_q-layer.1": 1373.1708, "encoder_q-layer.10": 689.7454, "encoder_q-layer.11": 1584.6719, "encoder_q-layer.2": 1335.088, "encoder_q-layer.3": 1185.9187, "encoder_q-layer.4": 874.0331, "encoder_q-layer.5": 720.9398, "encoder_q-layer.6": 760.074, "encoder_q-layer.7": 600.3406, "encoder_q-layer.8": 563.9952, "encoder_q-layer.9": 534.6113, "epoch": 0.08, "inbatch_neg_score": 0.5154, "inbatch_pos_score": 1.0664, "learning_rate": 3.85e-05, "loss": 4.4022, "norm_diff": 0.3952, "norm_loss": 0.0, "num_token_doc": 66.4572, "num_token_overlap": 14.5425, "num_token_query": 37.254, "num_token_union": 65.1895, "num_word_context": 202.0498, "num_word_doc": 49.5592, "num_word_query": 27.8748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1849.7426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5073, "query_norm": 2.0875, "queue_k_norm": 1.6919, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.254, "sent_len_1": 66.4572, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0712, "stdk": 0.0482, "stdq": 0.048, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.378, "doc_norm": 1.6808, "encoder_q-embeddings": 2205.0071, "encoder_q-layer.0": 1518.3975, "encoder_q-layer.1": 1189.7812, "encoder_q-layer.10": 933.9951, "encoder_q-layer.11": 1795.342, "encoder_q-layer.2": 1188.0887, "encoder_q-layer.3": 1067.3933, "encoder_q-layer.4": 828.3517, "encoder_q-layer.5": 750.1721, "encoder_q-layer.6": 764.7288, "encoder_q-layer.7": 696.5035, "encoder_q-layer.8": 901.6177, "encoder_q-layer.9": 826.585, "epoch": 0.08, "inbatch_neg_score": 0.5457, "inbatch_pos_score": 1.0791, "learning_rate": 3.9000000000000006e-05, "loss": 4.378, "norm_diff": 0.3808, "norm_loss": 0.0, "num_token_doc": 66.6593, "num_token_overlap": 14.6211, "num_token_query": 37.5044, "num_token_union": 65.3987, "num_word_context": 202.2458, "num_word_doc": 49.7367, "num_word_query": 28.1053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1919.7429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5366, "query_norm": 2.0616, "queue_k_norm": 1.6838, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5044, "sent_len_1": 66.6593, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.555, "stdk": 0.0481, "stdq": 0.0476, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 4.3546, "doc_norm": 1.6734, "encoder_q-embeddings": 2306.5481, "encoder_q-layer.0": 1970.8175, "encoder_q-layer.1": 1698.8723, "encoder_q-layer.10": 711.604, "encoder_q-layer.11": 1521.8254, "encoder_q-layer.2": 1718.9451, "encoder_q-layer.3": 1642.9271, "encoder_q-layer.4": 1483.7146, "encoder_q-layer.5": 1207.3965, "encoder_q-layer.6": 1055.311, "encoder_q-layer.7": 672.9478, "encoder_q-layer.8": 500.9778, "encoder_q-layer.9": 476.9223, "epoch": 0.08, "inbatch_neg_score": 0.6175, "inbatch_pos_score": 1.1924, "learning_rate": 3.9500000000000005e-05, "loss": 4.3546, "norm_diff": 0.371, "norm_loss": 0.0, "num_token_doc": 66.6451, "num_token_overlap": 14.5608, "num_token_query": 37.2582, "num_token_union": 65.2545, "num_word_context": 202.3354, "num_word_doc": 49.7119, "num_word_query": 27.9138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2190.6658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 2.0444, "queue_k_norm": 1.6759, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2582, "sent_len_1": 66.6451, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9175, "stdk": 0.0481, "stdq": 0.0482, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.3653, "doc_norm": 1.6637, "encoder_q-embeddings": 681.064, "encoder_q-layer.0": 516.9697, "encoder_q-layer.1": 564.0974, "encoder_q-layer.10": 747.496, "encoder_q-layer.11": 1529.4509, "encoder_q-layer.2": 575.8076, "encoder_q-layer.3": 596.0728, "encoder_q-layer.4": 592.4814, "encoder_q-layer.5": 581.867, "encoder_q-layer.6": 581.2614, "encoder_q-layer.7": 545.2021, "encoder_q-layer.8": 638.4916, "encoder_q-layer.9": 589.741, "epoch": 0.08, "inbatch_neg_score": 0.6166, "inbatch_pos_score": 1.1807, "learning_rate": 4e-05, "loss": 4.3653, "norm_diff": 0.3988, "norm_loss": 0.0, "num_token_doc": 66.6584, "num_token_overlap": 14.6491, "num_token_query": 37.5303, "num_token_union": 65.3625, "num_word_context": 202.4062, "num_word_doc": 49.7395, "num_word_query": 28.1192, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1064.8687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.606, "query_norm": 2.0625, "queue_k_norm": 1.6732, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5303, "sent_len_1": 66.6584, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.3413, "stdk": 0.0479, "stdq": 0.0497, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.3565, "doc_norm": 1.6726, "encoder_q-embeddings": 2235.9233, "encoder_q-layer.0": 1938.7468, "encoder_q-layer.1": 2082.8267, "encoder_q-layer.10": 1633.6652, "encoder_q-layer.11": 3430.6965, "encoder_q-layer.2": 2173.7283, "encoder_q-layer.3": 2404.9041, "encoder_q-layer.4": 2036.8708, "encoder_q-layer.5": 1924.3832, "encoder_q-layer.6": 1626.4305, "encoder_q-layer.7": 1155.1434, "encoder_q-layer.8": 1155.4319, "encoder_q-layer.9": 1074.6553, "epoch": 0.08, "inbatch_neg_score": 0.6648, "inbatch_pos_score": 1.2344, "learning_rate": 4.05e-05, "loss": 4.3565, "norm_diff": 0.3487, "norm_loss": 0.0, "num_token_doc": 66.7761, "num_token_overlap": 14.549, "num_token_query": 37.1504, "num_token_union": 65.288, "num_word_context": 202.2033, "num_word_doc": 49.809, "num_word_query": 27.8254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3025.2359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6562, "query_norm": 2.0213, "queue_k_norm": 1.6665, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1504, "sent_len_1": 66.7761, "sent_len_max_0": 127.995, "sent_len_max_1": 189.4325, "stdk": 0.0482, "stdq": 0.0486, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.3188, "doc_norm": 1.6647, "encoder_q-embeddings": 1254.861, "encoder_q-layer.0": 952.8501, "encoder_q-layer.1": 1002.0084, "encoder_q-layer.10": 1543.5103, "encoder_q-layer.11": 3187.4348, "encoder_q-layer.2": 1101.6255, "encoder_q-layer.3": 1168.5964, "encoder_q-layer.4": 1221.3356, "encoder_q-layer.5": 1135.4177, "encoder_q-layer.6": 1137.094, "encoder_q-layer.7": 1159.1562, "encoder_q-layer.8": 1194.2551, "encoder_q-layer.9": 1191.4062, "epoch": 0.08, "inbatch_neg_score": 0.6335, "inbatch_pos_score": 1.167, "learning_rate": 4.1e-05, "loss": 4.3188, "norm_diff": 0.3372, "norm_loss": 0.0, "num_token_doc": 66.5621, "num_token_overlap": 14.5997, "num_token_query": 37.4282, "num_token_union": 65.3496, "num_word_context": 202.2599, "num_word_doc": 49.6735, "num_word_query": 28.0258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2106.9478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.626, "query_norm": 2.0019, "queue_k_norm": 1.664, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4282, "sent_len_1": 66.5621, "sent_len_max_0": 128.0, "sent_len_max_1": 189.365, "stdk": 0.048, "stdq": 0.0485, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.302, "doc_norm": 1.6589, "encoder_q-embeddings": 1021.2198, "encoder_q-layer.0": 782.9328, "encoder_q-layer.1": 838.1691, "encoder_q-layer.10": 1546.6287, "encoder_q-layer.11": 3202.5669, "encoder_q-layer.2": 925.442, "encoder_q-layer.3": 956.9441, "encoder_q-layer.4": 967.3459, "encoder_q-layer.5": 931.0079, "encoder_q-layer.6": 917.8657, "encoder_q-layer.7": 915.4848, "encoder_q-layer.8": 1077.8219, "encoder_q-layer.9": 1002.2599, "epoch": 0.08, "inbatch_neg_score": 0.6586, "inbatch_pos_score": 1.2324, "learning_rate": 4.15e-05, "loss": 4.302, "norm_diff": 0.3401, "norm_loss": 0.0, "num_token_doc": 66.9351, "num_token_overlap": 14.6622, "num_token_query": 37.653, "num_token_union": 65.5996, "num_word_context": 202.7706, "num_word_doc": 49.9439, "num_word_query": 28.1901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1939.7336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6528, "query_norm": 1.999, "queue_k_norm": 1.663, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.653, "sent_len_1": 66.9351, "sent_len_max_0": 127.9775, "sent_len_max_1": 189.39, "stdk": 0.0479, "stdq": 0.0487, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.3252, "doc_norm": 1.6571, "encoder_q-embeddings": 1084.5406, "encoder_q-layer.0": 785.8151, "encoder_q-layer.1": 795.5846, "encoder_q-layer.10": 1327.7955, "encoder_q-layer.11": 3045.2434, "encoder_q-layer.2": 883.299, "encoder_q-layer.3": 895.3297, "encoder_q-layer.4": 886.6577, "encoder_q-layer.5": 804.1041, "encoder_q-layer.6": 824.8848, "encoder_q-layer.7": 861.2903, "encoder_q-layer.8": 1034.3771, "encoder_q-layer.9": 996.8529, "epoch": 0.08, "inbatch_neg_score": 0.6334, "inbatch_pos_score": 1.1855, "learning_rate": 4.2e-05, "loss": 4.3252, "norm_diff": 0.3598, "norm_loss": 0.0, "num_token_doc": 66.4166, "num_token_overlap": 14.5289, "num_token_query": 37.1783, "num_token_union": 65.1391, "num_word_context": 201.8665, "num_word_doc": 49.5924, "num_word_query": 27.8571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1873.2246, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 2.017, "queue_k_norm": 1.6599, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1783, "sent_len_1": 66.4166, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6087, "stdk": 0.0478, "stdq": 0.0489, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.2924, "doc_norm": 1.6563, "encoder_q-embeddings": 1797.7914, "encoder_q-layer.0": 1471.6808, "encoder_q-layer.1": 1661.5273, "encoder_q-layer.10": 1408.7089, "encoder_q-layer.11": 2990.3342, "encoder_q-layer.2": 1832.8798, "encoder_q-layer.3": 1683.6166, "encoder_q-layer.4": 1593.1754, "encoder_q-layer.5": 1357.9723, "encoder_q-layer.6": 1187.1273, "encoder_q-layer.7": 911.7672, "encoder_q-layer.8": 946.3758, "encoder_q-layer.9": 1024.6902, "epoch": 0.08, "inbatch_neg_score": 0.6431, "inbatch_pos_score": 1.2227, "learning_rate": 4.25e-05, "loss": 4.2924, "norm_diff": 0.333, "norm_loss": 0.0, "num_token_doc": 66.9049, "num_token_overlap": 14.6478, "num_token_query": 37.4299, "num_token_union": 65.4664, "num_word_context": 202.7325, "num_word_doc": 49.9482, "num_word_query": 28.0332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2432.786, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6343, "query_norm": 1.9893, "queue_k_norm": 1.6543, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4299, "sent_len_1": 66.9049, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.315, "stdk": 0.0481, "stdq": 0.048, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.2751, "doc_norm": 1.6532, "encoder_q-embeddings": 2252.8479, "encoder_q-layer.0": 1893.9725, "encoder_q-layer.1": 2272.5454, "encoder_q-layer.10": 1275.0625, "encoder_q-layer.11": 2997.8, "encoder_q-layer.2": 2496.3372, "encoder_q-layer.3": 2206.9436, "encoder_q-layer.4": 1964.5399, "encoder_q-layer.5": 1774.3762, "encoder_q-layer.6": 1467.1327, "encoder_q-layer.7": 1044.1775, "encoder_q-layer.8": 1098.9121, "encoder_q-layer.9": 974.8287, "epoch": 0.08, "inbatch_neg_score": 0.6079, "inbatch_pos_score": 1.166, "learning_rate": 4.3e-05, "loss": 4.2751, "norm_diff": 0.2978, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 14.5473, "num_token_query": 37.3558, "num_token_union": 65.418, "num_word_context": 202.1888, "num_word_doc": 49.7985, "num_word_query": 27.9433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2959.2896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6016, "query_norm": 1.951, "queue_k_norm": 1.6523, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3558, "sent_len_1": 66.762, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.585, "stdk": 0.048, "stdq": 0.0472, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 4.2469, "doc_norm": 1.6488, "encoder_q-embeddings": 1449.5446, "encoder_q-layer.0": 1079.3058, "encoder_q-layer.1": 1065.7203, "encoder_q-layer.10": 1438.8912, "encoder_q-layer.11": 2875.4871, "encoder_q-layer.2": 1230.2094, "encoder_q-layer.3": 1299.4468, "encoder_q-layer.4": 1318.6611, "encoder_q-layer.5": 1070.5532, "encoder_q-layer.6": 978.2972, "encoder_q-layer.7": 873.4136, "encoder_q-layer.8": 1057.4615, "encoder_q-layer.9": 1052.7888, "epoch": 0.08, "inbatch_neg_score": 0.5917, "inbatch_pos_score": 1.1445, "learning_rate": 4.35e-05, "loss": 4.2469, "norm_diff": 0.3002, "norm_loss": 0.0, "num_token_doc": 66.6919, "num_token_overlap": 14.5748, "num_token_query": 37.3101, "num_token_union": 65.3163, "num_word_context": 202.2413, "num_word_doc": 49.7411, "num_word_query": 27.9057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2063.162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.583, "query_norm": 1.9489, "queue_k_norm": 1.6484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3101, "sent_len_1": 66.6919, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5238, "stdk": 0.048, "stdq": 0.0478, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.2475, "doc_norm": 1.6421, "encoder_q-embeddings": 879.6918, "encoder_q-layer.0": 628.2173, "encoder_q-layer.1": 652.9994, "encoder_q-layer.10": 1204.2207, "encoder_q-layer.11": 2873.2256, "encoder_q-layer.2": 725.0087, "encoder_q-layer.3": 744.3481, "encoder_q-layer.4": 770.0624, "encoder_q-layer.5": 749.2549, "encoder_q-layer.6": 836.5833, "encoder_q-layer.7": 860.2056, "encoder_q-layer.8": 989.2527, "encoder_q-layer.9": 929.4445, "epoch": 0.09, "inbatch_neg_score": 0.5608, "inbatch_pos_score": 1.1211, "learning_rate": 4.4000000000000006e-05, "loss": 4.2475, "norm_diff": 0.2953, "norm_loss": 0.0, "num_token_doc": 66.6504, "num_token_overlap": 14.6357, "num_token_query": 37.4361, "num_token_union": 65.312, "num_word_context": 202.0697, "num_word_doc": 49.7172, "num_word_query": 28.0254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1672.5063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.9374, "queue_k_norm": 1.6451, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4361, "sent_len_1": 66.6504, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.7175, "stdk": 0.0477, "stdq": 0.0484, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.2278, "doc_norm": 1.6428, "encoder_q-embeddings": 813.0558, "encoder_q-layer.0": 571.4836, "encoder_q-layer.1": 578.7214, "encoder_q-layer.10": 1667.4692, "encoder_q-layer.11": 3805.3584, "encoder_q-layer.2": 631.606, "encoder_q-layer.3": 674.6206, "encoder_q-layer.4": 717.2144, "encoder_q-layer.5": 692.5114, "encoder_q-layer.6": 799.3007, "encoder_q-layer.7": 851.7799, "encoder_q-layer.8": 1005.6376, "encoder_q-layer.9": 1013.5685, "epoch": 0.09, "inbatch_neg_score": 0.5448, "inbatch_pos_score": 1.0977, "learning_rate": 4.4500000000000004e-05, "loss": 4.2278, "norm_diff": 0.2511, "norm_loss": 0.0, "num_token_doc": 66.7535, "num_token_overlap": 14.6226, "num_token_query": 37.3292, "num_token_union": 65.3252, "num_word_context": 201.9486, "num_word_doc": 49.7906, "num_word_query": 27.9078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1994.8367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5386, "query_norm": 1.8939, "queue_k_norm": 1.6407, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3292, "sent_len_1": 66.7535, "sent_len_max_0": 127.985, "sent_len_max_1": 189.97, "stdk": 0.0479, "stdq": 0.0462, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.2155, "doc_norm": 1.631, "encoder_q-embeddings": 2373.2893, "encoder_q-layer.0": 1965.3123, "encoder_q-layer.1": 1924.9783, "encoder_q-layer.10": 1443.259, "encoder_q-layer.11": 3514.1433, "encoder_q-layer.2": 2365.3289, "encoder_q-layer.3": 2193.4097, "encoder_q-layer.4": 2351.4199, "encoder_q-layer.5": 1961.976, "encoder_q-layer.6": 1909.4464, "encoder_q-layer.7": 1284.4661, "encoder_q-layer.8": 1028.7711, "encoder_q-layer.9": 1005.0036, "epoch": 0.09, "inbatch_neg_score": 0.5191, "inbatch_pos_score": 1.0781, "learning_rate": 4.5e-05, "loss": 4.2155, "norm_diff": 0.2672, "norm_loss": 0.0, "num_token_doc": 66.6662, "num_token_overlap": 14.5257, "num_token_query": 37.1948, "num_token_union": 65.2787, "num_word_context": 202.2299, "num_word_doc": 49.7105, "num_word_query": 27.8087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3070.8531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5117, "query_norm": 1.8982, "queue_k_norm": 1.6382, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1948, "sent_len_1": 66.6662, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5513, "stdk": 0.0478, "stdq": 0.0469, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.1693, "doc_norm": 1.6308, "encoder_q-embeddings": 815.0953, "encoder_q-layer.0": 549.7354, "encoder_q-layer.1": 576.8614, "encoder_q-layer.10": 1405.7351, "encoder_q-layer.11": 3146.583, "encoder_q-layer.2": 621.7225, "encoder_q-layer.3": 646.5054, "encoder_q-layer.4": 727.7189, "encoder_q-layer.5": 669.773, "encoder_q-layer.6": 745.0593, "encoder_q-layer.7": 805.8165, "encoder_q-layer.8": 967.1487, "encoder_q-layer.9": 962.1588, "epoch": 0.09, "inbatch_neg_score": 0.5443, "inbatch_pos_score": 1.085, "learning_rate": 4.55e-05, "loss": 4.1693, "norm_diff": 0.2901, "norm_loss": 0.0, "num_token_doc": 66.7751, "num_token_overlap": 14.6038, "num_token_query": 37.5592, "num_token_union": 65.5274, "num_word_context": 202.6877, "num_word_doc": 49.8442, "num_word_query": 28.1181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1745.8013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5371, "query_norm": 1.9208, "queue_k_norm": 1.6326, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5592, "sent_len_1": 66.7751, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6987, "stdk": 0.0478, "stdq": 0.0471, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.1851, "doc_norm": 1.6224, "encoder_q-embeddings": 884.2663, "encoder_q-layer.0": 626.798, "encoder_q-layer.1": 657.7001, "encoder_q-layer.10": 1161.865, "encoder_q-layer.11": 2782.136, "encoder_q-layer.2": 729.6589, "encoder_q-layer.3": 765.7062, "encoder_q-layer.4": 788.2681, "encoder_q-layer.5": 706.7485, "encoder_q-layer.6": 743.5828, "encoder_q-layer.7": 751.7181, "encoder_q-layer.8": 898.2056, "encoder_q-layer.9": 826.8012, "epoch": 0.09, "inbatch_neg_score": 0.5369, "inbatch_pos_score": 1.0859, "learning_rate": 4.600000000000001e-05, "loss": 4.1851, "norm_diff": 0.2707, "norm_loss": 0.0, "num_token_doc": 66.8071, "num_token_overlap": 14.5762, "num_token_query": 37.4164, "num_token_union": 65.4441, "num_word_context": 202.4628, "num_word_doc": 49.8466, "num_word_query": 28.0068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1628.6544, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.8931, "queue_k_norm": 1.6277, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4164, "sent_len_1": 66.8071, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5525, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.1607, "doc_norm": 1.6235, "encoder_q-embeddings": 2971.6997, "encoder_q-layer.0": 2559.5176, "encoder_q-layer.1": 2492.8428, "encoder_q-layer.10": 1152.4999, "encoder_q-layer.11": 2810.3452, "encoder_q-layer.2": 2686.4426, "encoder_q-layer.3": 2683.9307, "encoder_q-layer.4": 2365.1826, "encoder_q-layer.5": 2069.9858, "encoder_q-layer.6": 1668.7732, "encoder_q-layer.7": 1191.7412, "encoder_q-layer.8": 1056.5234, "encoder_q-layer.9": 945.3089, "epoch": 0.09, "inbatch_neg_score": 0.5247, "inbatch_pos_score": 1.1016, "learning_rate": 4.6500000000000005e-05, "loss": 4.1607, "norm_diff": 0.2418, "norm_loss": 0.0, "num_token_doc": 66.8772, "num_token_overlap": 14.6956, "num_token_query": 37.5138, "num_token_union": 65.4795, "num_word_context": 202.142, "num_word_doc": 49.9562, "num_word_query": 28.1017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3341.8041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.521, "query_norm": 1.8653, "queue_k_norm": 1.625, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5138, "sent_len_1": 66.8772, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3887, "stdk": 0.0479, "stdq": 0.0469, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1322, "doc_norm": 1.6183, "encoder_q-embeddings": 1480.1936, "encoder_q-layer.0": 1192.5105, "encoder_q-layer.1": 1319.7439, "encoder_q-layer.10": 1042.7439, "encoder_q-layer.11": 2556.4397, "encoder_q-layer.2": 1470.4845, "encoder_q-layer.3": 1368.3354, "encoder_q-layer.4": 1403.4897, "encoder_q-layer.5": 1346.1935, "encoder_q-layer.6": 1479.167, "encoder_q-layer.7": 1283.9127, "encoder_q-layer.8": 1017.6416, "encoder_q-layer.9": 841.1539, "epoch": 0.09, "inbatch_neg_score": 0.5399, "inbatch_pos_score": 1.1055, "learning_rate": 4.7e-05, "loss": 4.1322, "norm_diff": 0.2294, "norm_loss": 0.0, "num_token_doc": 66.9753, "num_token_overlap": 14.6026, "num_token_query": 37.4032, "num_token_union": 65.5488, "num_word_context": 202.6764, "num_word_doc": 49.9914, "num_word_query": 28.018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2121.5707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5327, "query_norm": 1.8477, "queue_k_norm": 1.6198, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4032, "sent_len_1": 66.9753, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8313, "stdk": 0.0479, "stdq": 0.047, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.1546, "doc_norm": 1.6181, "encoder_q-embeddings": 916.3379, "encoder_q-layer.0": 626.8359, "encoder_q-layer.1": 656.3718, "encoder_q-layer.10": 1614.6013, "encoder_q-layer.11": 3542.9329, "encoder_q-layer.2": 706.7803, "encoder_q-layer.3": 726.7607, "encoder_q-layer.4": 760.955, "encoder_q-layer.5": 748.0721, "encoder_q-layer.6": 835.7164, "encoder_q-layer.7": 930.3179, "encoder_q-layer.8": 1123.2369, "encoder_q-layer.9": 1037.9281, "epoch": 0.09, "inbatch_neg_score": 0.5354, "inbatch_pos_score": 1.0781, "learning_rate": 4.75e-05, "loss": 4.1546, "norm_diff": 0.2295, "norm_loss": 0.0, "num_token_doc": 66.7526, "num_token_overlap": 14.4506, "num_token_query": 36.939, "num_token_union": 65.1888, "num_word_context": 202.1047, "num_word_doc": 49.8, "num_word_query": 27.6212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1939.4342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.8476, "queue_k_norm": 1.6147, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 36.939, "sent_len_1": 66.7526, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9875, "stdk": 0.0479, "stdq": 0.0459, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.1153, "doc_norm": 1.6064, "encoder_q-embeddings": 4241.3472, "encoder_q-layer.0": 3148.354, "encoder_q-layer.1": 3141.2986, "encoder_q-layer.10": 1118.6422, "encoder_q-layer.11": 2994.375, "encoder_q-layer.2": 3856.0002, "encoder_q-layer.3": 3963.8823, "encoder_q-layer.4": 3355.6797, "encoder_q-layer.5": 3307.4929, "encoder_q-layer.6": 2513.6091, "encoder_q-layer.7": 1345.6967, "encoder_q-layer.8": 1033.6429, "encoder_q-layer.9": 886.2313, "epoch": 0.09, "inbatch_neg_score": 0.5001, "inbatch_pos_score": 1.0547, "learning_rate": 4.8e-05, "loss": 4.1153, "norm_diff": 0.1718, "norm_loss": 0.0, "num_token_doc": 66.8875, "num_token_overlap": 14.5938, "num_token_query": 37.3932, "num_token_union": 65.4551, "num_word_context": 202.4151, "num_word_doc": 49.9178, "num_word_query": 27.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4603.8047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4939, "query_norm": 1.7782, "queue_k_norm": 1.6117, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3932, "sent_len_1": 66.8875, "sent_len_max_0": 128.0, "sent_len_max_1": 190.26, "stdk": 0.0477, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.1276, "doc_norm": 1.6014, "encoder_q-embeddings": 1204.3685, "encoder_q-layer.0": 876.1662, "encoder_q-layer.1": 920.3129, "encoder_q-layer.10": 1042.2371, "encoder_q-layer.11": 2725.3149, "encoder_q-layer.2": 975.8513, "encoder_q-layer.3": 979.4374, "encoder_q-layer.4": 937.1344, "encoder_q-layer.5": 853.9367, "encoder_q-layer.6": 945.2433, "encoder_q-layer.7": 883.3406, "encoder_q-layer.8": 891.0592, "encoder_q-layer.9": 807.4559, "epoch": 0.09, "inbatch_neg_score": 0.5064, "inbatch_pos_score": 1.0449, "learning_rate": 4.85e-05, "loss": 4.1276, "norm_diff": 0.1617, "norm_loss": 0.0, "num_token_doc": 66.8005, "num_token_overlap": 14.5524, "num_token_query": 37.2859, "num_token_union": 65.3866, "num_word_context": 202.4027, "num_word_doc": 49.8814, "num_word_query": 27.9363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1791.9602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4993, "query_norm": 1.7631, "queue_k_norm": 1.6043, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2859, "sent_len_1": 66.8005, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.9137, "stdk": 0.0476, "stdq": 0.0455, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 4.1018, "doc_norm": 1.5962, "encoder_q-embeddings": 783.582, "encoder_q-layer.0": 538.7242, "encoder_q-layer.1": 538.5837, "encoder_q-layer.10": 978.8193, "encoder_q-layer.11": 2676.4946, "encoder_q-layer.2": 605.8077, "encoder_q-layer.3": 656.6118, "encoder_q-layer.4": 684.366, "encoder_q-layer.5": 683.8393, "encoder_q-layer.6": 706.2985, "encoder_q-layer.7": 767.1553, "encoder_q-layer.8": 894.2768, "encoder_q-layer.9": 781.2374, "epoch": 0.1, "inbatch_neg_score": 0.506, "inbatch_pos_score": 1.0693, "learning_rate": 4.9e-05, "loss": 4.1018, "norm_diff": 0.1486, "norm_loss": 0.0, "num_token_doc": 66.6533, "num_token_overlap": 14.6254, "num_token_query": 37.5007, "num_token_union": 65.3932, "num_word_context": 202.2029, "num_word_doc": 49.7462, "num_word_query": 28.0673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1490.7744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4995, "query_norm": 1.7447, "queue_k_norm": 1.5983, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5007, "sent_len_1": 66.6533, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.25, "stdk": 0.0476, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.0837, "doc_norm": 1.5967, "encoder_q-embeddings": 1089.1155, "encoder_q-layer.0": 800.6879, "encoder_q-layer.1": 826.5876, "encoder_q-layer.10": 1039.0956, "encoder_q-layer.11": 2537.2559, "encoder_q-layer.2": 938.2381, "encoder_q-layer.3": 951.278, "encoder_q-layer.4": 970.8276, "encoder_q-layer.5": 1027.746, "encoder_q-layer.6": 1092.3031, "encoder_q-layer.7": 1049.5145, "encoder_q-layer.8": 1000.5275, "encoder_q-layer.9": 846.3679, "epoch": 0.1, "inbatch_neg_score": 0.5016, "inbatch_pos_score": 1.0684, "learning_rate": 4.9500000000000004e-05, "loss": 4.0837, "norm_diff": 0.1237, "norm_loss": 0.0, "num_token_doc": 67.1128, "num_token_overlap": 14.6429, "num_token_query": 37.3302, "num_token_union": 65.5272, "num_word_context": 202.5724, "num_word_doc": 50.0292, "num_word_query": 27.9428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1705.3763, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4958, "query_norm": 1.7204, "queue_k_norm": 1.5936, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3302, "sent_len_1": 67.1128, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.7525, "stdk": 0.0478, "stdq": 0.0451, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0536, "doc_norm": 1.5838, "encoder_q-embeddings": 886.4033, "encoder_q-layer.0": 612.572, "encoder_q-layer.1": 646.3489, "encoder_q-layer.10": 1164.1786, "encoder_q-layer.11": 3141.002, "encoder_q-layer.2": 681.1077, "encoder_q-layer.3": 690.7387, "encoder_q-layer.4": 727.7529, "encoder_q-layer.5": 746.2166, "encoder_q-layer.6": 841.1957, "encoder_q-layer.7": 928.5804, "encoder_q-layer.8": 1053.9119, "encoder_q-layer.9": 916.6742, "epoch": 0.1, "inbatch_neg_score": 0.5111, "inbatch_pos_score": 1.0654, "learning_rate": 5e-05, "loss": 4.0536, "norm_diff": 0.0981, "norm_loss": 0.0, "num_token_doc": 66.7059, "num_token_overlap": 14.611, "num_token_query": 37.3196, "num_token_union": 65.3119, "num_word_context": 202.0535, "num_word_doc": 49.8049, "num_word_query": 27.9321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1726.8421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5049, "query_norm": 1.682, "queue_k_norm": 1.5895, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3196, "sent_len_1": 66.7059, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3237, "stdk": 0.0474, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 28.1597, "dev_samples_per_second": 2.273, "dev_steps_per_second": 0.036, "epoch": 0.1, "step": 10000, "test_accuracy": 91.22314453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5108627080917358, "test_doc_norm": 1.526336431503296, "test_inbatch_neg_score": 0.8314028978347778, "test_inbatch_pos_score": 1.6135749816894531, "test_loss": 0.5108627080917358, "test_loss_align": 1.9625189304351807, "test_loss_unif": 3.5019211769104004, "test_loss_unif_q@queue": 3.5019211769104004, "test_norm_diff": 0.15878459811210632, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.49930763244628906, "test_query_norm": 1.6851210594177246, "test_queue_k_norm": 1.5889222621917725, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.039925768971443176, "test_stdq": 0.03885919600725174, "test_stdqueue_k": 0.04782252758741379, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.1597, "dev_samples_per_second": 2.273, "dev_steps_per_second": 0.036, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.32147, "eval_beir-arguana_recall@10": 0.56615, "eval_beir-arguana_recall@100": 0.88762, "eval_beir-arguana_recall@20": 0.72048, "eval_beir-avg_ndcg@10": 0.25940375000000004, "eval_beir-avg_recall@10": 0.3241794166666666, "eval_beir-avg_recall@100": 0.5253615, "eval_beir-avg_recall@20": 0.38922475, "eval_beir-cqadupstack_ndcg@10": 0.17514749999999998, "eval_beir-cqadupstack_recall@10": 0.24848416666666664, "eval_beir-cqadupstack_recall@100": 0.459725, "eval_beir-cqadupstack_recall@20": 0.30282749999999997, "eval_beir-fiqa_ndcg@10": 0.15612, "eval_beir-fiqa_recall@10": 0.19925, "eval_beir-fiqa_recall@100": 0.4506, "eval_beir-fiqa_recall@20": 0.27102, "eval_beir-nfcorpus_ndcg@10": 0.24104, "eval_beir-nfcorpus_recall@10": 0.11008, "eval_beir-nfcorpus_recall@100": 0.23929, "eval_beir-nfcorpus_recall@20": 0.13825, "eval_beir-nq_ndcg@10": 0.15322, "eval_beir-nq_recall@10": 0.26178, "eval_beir-nq_recall@100": 0.58654, "eval_beir-nq_recall@20": 0.35933, "eval_beir-quora_ndcg@10": 0.44085, "eval_beir-quora_recall@10": 0.58079, "eval_beir-quora_recall@100": 0.84031, "eval_beir-quora_recall@20": 0.6733, "eval_beir-scidocs_ndcg@10": 0.10786, "eval_beir-scidocs_recall@10": 0.11447, "eval_beir-scidocs_recall@100": 0.29325, "eval_beir-scidocs_recall@20": 0.16232, "eval_beir-scifact_ndcg@10": 0.49131, "eval_beir-scifact_recall@10": 0.651, "eval_beir-scifact_recall@100": 0.86956, "eval_beir-scifact_recall@20": 0.73044, "eval_beir-trec-covid_ndcg@10": 0.35726, "eval_beir-trec-covid_recall@10": 0.404, "eval_beir-trec-covid_recall@100": 0.2704, "eval_beir-trec-covid_recall@20": 0.377, "eval_beir-webis-touche2020_ndcg@10": 0.14976, "eval_beir-webis-touche2020_recall@10": 0.10579, "eval_beir-webis-touche2020_recall@100": 0.35632, "eval_beir-webis-touche2020_recall@20": 0.15728, "eval_senteval-avg_sts": 0.7088714165528514, "eval_senteval-sickr_spearman": 0.6730934942543808, "eval_senteval-stsb_spearman": 0.7446493388513218, "step": 10000, "test_accuracy": 91.22314453125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5108627080917358, "test_doc_norm": 1.526336431503296, "test_inbatch_neg_score": 0.8314028978347778, "test_inbatch_pos_score": 1.6135749816894531, "test_loss": 0.5108627080917358, "test_loss_align": 1.9625189304351807, "test_loss_unif": 3.5019211769104004, "test_loss_unif_q@queue": 3.5019211769104004, "test_norm_diff": 0.15878459811210632, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.49930763244628906, "test_query_norm": 1.6851210594177246, "test_queue_k_norm": 1.5889222621917725, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.039925768971443176, "test_stdq": 0.03885919600725174, "test_stdqueue_k": 0.04782252758741379, "test_stdqueue_q": 0.0 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.0592, "doc_norm": 1.5777, "encoder_q-embeddings": 1709.1012, "encoder_q-layer.0": 1146.5707, "encoder_q-layer.1": 1165.1985, "encoder_q-layer.10": 2022.204, "encoder_q-layer.11": 5954.5366, "encoder_q-layer.2": 1305.8105, "encoder_q-layer.3": 1320.8368, "encoder_q-layer.4": 1380.3439, "encoder_q-layer.5": 1406.4049, "encoder_q-layer.6": 1551.0844, "encoder_q-layer.7": 1630.9446, "encoder_q-layer.8": 1776.7981, "encoder_q-layer.9": 1536.1566, "epoch": 0.1, "inbatch_neg_score": 0.4936, "inbatch_pos_score": 1.0459, "learning_rate": 4.994444444444445e-05, "loss": 4.0592, "norm_diff": 0.0884, "norm_loss": 0.0, "num_token_doc": 66.7054, "num_token_overlap": 14.5579, "num_token_query": 37.2337, "num_token_union": 65.2966, "num_word_context": 202.0564, "num_word_doc": 49.7691, "num_word_query": 27.8774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3250.9595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4873, "query_norm": 1.6661, "queue_k_norm": 1.5804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2337, "sent_len_1": 66.7054, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 4.0086, "doc_norm": 1.5779, "encoder_q-embeddings": 5764.4717, "encoder_q-layer.0": 4513.4082, "encoder_q-layer.1": 5072.6958, "encoder_q-layer.10": 1926.7689, "encoder_q-layer.11": 5053.3584, "encoder_q-layer.2": 5437.5972, "encoder_q-layer.3": 4702.0508, "encoder_q-layer.4": 4660.7783, "encoder_q-layer.5": 4004.6663, "encoder_q-layer.6": 3179.7163, "encoder_q-layer.7": 2118.1311, "encoder_q-layer.8": 1836.4116, "encoder_q-layer.9": 1570.7213, "epoch": 0.1, "inbatch_neg_score": 0.4642, "inbatch_pos_score": 1.0273, "learning_rate": 4.9888888888888894e-05, "loss": 4.0086, "norm_diff": 0.0274, "norm_loss": 0.0, "num_token_doc": 66.9794, "num_token_overlap": 14.7176, "num_token_query": 37.6606, "num_token_union": 65.5803, "num_word_context": 202.6132, "num_word_doc": 49.9256, "num_word_query": 28.2056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6355.4142, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4622, "query_norm": 1.6053, "queue_k_norm": 1.5764, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6606, "sent_len_1": 66.9794, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9575, "stdk": 0.0478, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 4.032, "doc_norm": 1.5691, "encoder_q-embeddings": 1633.6938, "encoder_q-layer.0": 1122.9735, "encoder_q-layer.1": 1196.3019, "encoder_q-layer.10": 1822.3285, "encoder_q-layer.11": 4974.1074, "encoder_q-layer.2": 1372.0358, "encoder_q-layer.3": 1473.7336, "encoder_q-layer.4": 1548.771, "encoder_q-layer.5": 1465.0557, "encoder_q-layer.6": 1511.1187, "encoder_q-layer.7": 1684.9214, "encoder_q-layer.8": 1764.3654, "encoder_q-layer.9": 1552.2867, "epoch": 0.1, "inbatch_neg_score": 0.4549, "inbatch_pos_score": 1.0361, "learning_rate": 4.9833333333333336e-05, "loss": 4.032, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.6085, "num_token_overlap": 14.5651, "num_token_query": 37.4848, "num_token_union": 65.4447, "num_word_context": 202.6246, "num_word_doc": 49.7089, "num_word_query": 28.0715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2977.0467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4524, "query_norm": 1.5938, "queue_k_norm": 1.572, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4848, "sent_len_1": 66.6085, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5025, "stdk": 0.0477, "stdq": 0.043, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 4.0483, "doc_norm": 1.5745, "encoder_q-embeddings": 4845.6289, "encoder_q-layer.0": 3906.9124, "encoder_q-layer.1": 4752.3379, "encoder_q-layer.10": 2127.2178, "encoder_q-layer.11": 5284.5884, "encoder_q-layer.2": 4717.4907, "encoder_q-layer.3": 3778.6396, "encoder_q-layer.4": 2942.8604, "encoder_q-layer.5": 2344.5071, "encoder_q-layer.6": 2261.7847, "encoder_q-layer.7": 1919.4623, "encoder_q-layer.8": 2139.324, "encoder_q-layer.9": 1780.5345, "epoch": 0.1, "inbatch_neg_score": 0.4675, "inbatch_pos_score": 1.0371, "learning_rate": 4.977777777777778e-05, "loss": 4.0483, "norm_diff": 0.0683, "norm_loss": 0.0, "num_token_doc": 66.6351, "num_token_overlap": 14.5398, "num_token_query": 37.2601, "num_token_union": 65.2559, "num_word_context": 202.2373, "num_word_doc": 49.7491, "num_word_query": 27.899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5521.9595, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.46, "query_norm": 1.6428, "queue_k_norm": 1.5696, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2601, "sent_len_1": 66.6351, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0625, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 4.0324, "doc_norm": 1.5645, "encoder_q-embeddings": 2880.4346, "encoder_q-layer.0": 2152.6221, "encoder_q-layer.1": 2482.0137, "encoder_q-layer.10": 1927.708, "encoder_q-layer.11": 6068.4766, "encoder_q-layer.2": 2745.3118, "encoder_q-layer.3": 3010.554, "encoder_q-layer.4": 3431.5808, "encoder_q-layer.5": 3346.2849, "encoder_q-layer.6": 3180.0969, "encoder_q-layer.7": 2731.8035, "encoder_q-layer.8": 2552.2764, "encoder_q-layer.9": 1599.9967, "epoch": 0.1, "inbatch_neg_score": 0.4685, "inbatch_pos_score": 1.0527, "learning_rate": 4.972222222222223e-05, "loss": 4.0324, "norm_diff": 0.0892, "norm_loss": 0.0, "num_token_doc": 66.622, "num_token_overlap": 14.613, "num_token_query": 37.4385, "num_token_union": 65.3665, "num_word_context": 202.2355, "num_word_doc": 49.7139, "num_word_query": 28.034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4619.5782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4624, "query_norm": 1.6537, "queue_k_norm": 1.5645, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4385, "sent_len_1": 66.622, "sent_len_max_0": 128.0, "sent_len_max_1": 187.19, "stdk": 0.0477, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 4.0591, "doc_norm": 1.5732, "encoder_q-embeddings": 2702.0071, "encoder_q-layer.0": 2111.3408, "encoder_q-layer.1": 2264.282, "encoder_q-layer.10": 2185.6956, "encoder_q-layer.11": 6764.2729, "encoder_q-layer.2": 2527.3403, "encoder_q-layer.3": 2534.1138, "encoder_q-layer.4": 2577.3992, "encoder_q-layer.5": 2436.802, "encoder_q-layer.6": 2292.8369, "encoder_q-layer.7": 2177.8899, "encoder_q-layer.8": 2175.9148, "encoder_q-layer.9": 1563.2371, "epoch": 0.1, "inbatch_neg_score": 0.4393, "inbatch_pos_score": 1.0059, "learning_rate": 4.966666666666667e-05, "loss": 4.0591, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.6199, "num_token_overlap": 14.5478, "num_token_query": 37.2449, "num_token_union": 65.2659, "num_word_context": 202.1891, "num_word_doc": 49.752, "num_word_query": 27.8786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4321.7057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4341, "query_norm": 1.624, "queue_k_norm": 1.5632, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2449, "sent_len_1": 66.6199, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2738, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 4.0238, "doc_norm": 1.5705, "encoder_q-embeddings": 1711.5656, "encoder_q-layer.0": 1223.6274, "encoder_q-layer.1": 1327.5057, "encoder_q-layer.10": 2431.6384, "encoder_q-layer.11": 6945.8628, "encoder_q-layer.2": 1550.6633, "encoder_q-layer.3": 1710.8737, "encoder_q-layer.4": 1734.2487, "encoder_q-layer.5": 1832.1228, "encoder_q-layer.6": 2036.5142, "encoder_q-layer.7": 1970.8174, "encoder_q-layer.8": 2013.155, "encoder_q-layer.9": 1729.3938, "epoch": 0.1, "inbatch_neg_score": 0.4551, "inbatch_pos_score": 1.0186, "learning_rate": 4.961111111111111e-05, "loss": 4.0238, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.621, "num_token_overlap": 14.542, "num_token_query": 37.3171, "num_token_union": 65.3035, "num_word_context": 202.2742, "num_word_doc": 49.7061, "num_word_query": 27.9325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3675.4121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4507, "query_norm": 1.6149, "queue_k_norm": 1.5635, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3171, "sent_len_1": 66.621, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0475, "stdk": 0.0482, "stdq": 0.043, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0041, "doc_norm": 1.5662, "encoder_q-embeddings": 3756.074, "encoder_q-layer.0": 2878.9927, "encoder_q-layer.1": 3184.3923, "encoder_q-layer.10": 1786.4843, "encoder_q-layer.11": 5191.7549, "encoder_q-layer.2": 3638.3711, "encoder_q-layer.3": 3839.3569, "encoder_q-layer.4": 4996.4253, "encoder_q-layer.5": 4588.4741, "encoder_q-layer.6": 5281.2549, "encoder_q-layer.7": 5240.3809, "encoder_q-layer.8": 4068.6277, "encoder_q-layer.9": 1713.8857, "epoch": 0.11, "inbatch_neg_score": 0.4693, "inbatch_pos_score": 1.0449, "learning_rate": 4.955555555555556e-05, "loss": 4.0041, "norm_diff": 0.0578, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 14.5841, "num_token_query": 37.367, "num_token_union": 65.396, "num_word_context": 202.207, "num_word_doc": 49.8319, "num_word_query": 27.9662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5864.8961, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4666, "query_norm": 1.624, "queue_k_norm": 1.5656, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.367, "sent_len_1": 66.7954, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1488, "stdk": 0.048, "stdq": 0.0435, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 4.0046, "doc_norm": 1.5738, "encoder_q-embeddings": 21722.2598, "encoder_q-layer.0": 19937.3594, "encoder_q-layer.1": 19218.4297, "encoder_q-layer.10": 2034.309, "encoder_q-layer.11": 5916.146, "encoder_q-layer.2": 20025.416, "encoder_q-layer.3": 19429.6211, "encoder_q-layer.4": 17519.6992, "encoder_q-layer.5": 13598.1621, "encoder_q-layer.6": 10163.2773, "encoder_q-layer.7": 8900.2432, "encoder_q-layer.8": 5356.689, "encoder_q-layer.9": 1890.1096, "epoch": 0.11, "inbatch_neg_score": 0.4884, "inbatch_pos_score": 1.0781, "learning_rate": 4.9500000000000004e-05, "loss": 4.0046, "norm_diff": 0.0804, "norm_loss": 0.0, "num_token_doc": 66.9869, "num_token_overlap": 14.5143, "num_token_query": 37.1318, "num_token_union": 65.458, "num_word_context": 202.5768, "num_word_doc": 49.9817, "num_word_query": 27.8, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22300.2569, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4851, "query_norm": 1.6542, "queue_k_norm": 1.5671, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1318, "sent_len_1": 66.9869, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.1163, "stdk": 0.0483, "stdq": 0.0437, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 4.0208, "doc_norm": 1.5684, "encoder_q-embeddings": 2261.8291, "encoder_q-layer.0": 1628.4873, "encoder_q-layer.1": 1887.9918, "encoder_q-layer.10": 1883.5596, "encoder_q-layer.11": 5860.0752, "encoder_q-layer.2": 2197.3198, "encoder_q-layer.3": 2357.7136, "encoder_q-layer.4": 2629.6975, "encoder_q-layer.5": 2392.9216, "encoder_q-layer.6": 2789.1272, "encoder_q-layer.7": 2604.7754, "encoder_q-layer.8": 2386.5095, "encoder_q-layer.9": 1499.5901, "epoch": 0.11, "inbatch_neg_score": 0.4624, "inbatch_pos_score": 1.0234, "learning_rate": 4.9444444444444446e-05, "loss": 4.0208, "norm_diff": 0.031, "norm_loss": 0.0, "num_token_doc": 66.8784, "num_token_overlap": 14.5708, "num_token_query": 37.2655, "num_token_union": 65.411, "num_word_context": 202.4886, "num_word_doc": 49.8294, "num_word_query": 27.8919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3988.3031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4573, "query_norm": 1.5994, "queue_k_norm": 1.5735, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2655, "sent_len_1": 66.8784, "sent_len_max_0": 127.9887, "sent_len_max_1": 191.23, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9943, "doc_norm": 1.5749, "encoder_q-embeddings": 2447.2898, "encoder_q-layer.0": 2212.5168, "encoder_q-layer.1": 2272.5283, "encoder_q-layer.10": 1881.5574, "encoder_q-layer.11": 6092.1548, "encoder_q-layer.2": 2517.0833, "encoder_q-layer.3": 2290.9792, "encoder_q-layer.4": 2310.4333, "encoder_q-layer.5": 2314.8638, "encoder_q-layer.6": 2479.0159, "encoder_q-layer.7": 2022.176, "encoder_q-layer.8": 1929.8748, "encoder_q-layer.9": 1452.5233, "epoch": 0.11, "inbatch_neg_score": 0.4915, "inbatch_pos_score": 1.0684, "learning_rate": 4.938888888888889e-05, "loss": 3.9943, "norm_diff": 0.034, "norm_loss": 0.0, "num_token_doc": 67.0296, "num_token_overlap": 14.6403, "num_token_query": 37.4225, "num_token_union": 65.5404, "num_word_context": 202.8841, "num_word_doc": 50.0021, "num_word_query": 28.0382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4040.5649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4878, "query_norm": 1.6089, "queue_k_norm": 1.5772, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4225, "sent_len_1": 67.0296, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6625, "stdk": 0.0483, "stdq": 0.0431, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0324, "doc_norm": 1.5874, "encoder_q-embeddings": 37489.1484, "encoder_q-layer.0": 28242.7891, "encoder_q-layer.1": 32136.8926, "encoder_q-layer.10": 1971.9629, "encoder_q-layer.11": 4721.8979, "encoder_q-layer.2": 41404.6602, "encoder_q-layer.3": 45736.4805, "encoder_q-layer.4": 53585.3867, "encoder_q-layer.5": 57713.1797, "encoder_q-layer.6": 45741.3945, "encoder_q-layer.7": 34280.0352, "encoder_q-layer.8": 17132.2988, "encoder_q-layer.9": 4810.7515, "epoch": 0.11, "inbatch_neg_score": 0.4165, "inbatch_pos_score": 0.9897, "learning_rate": 4.933333333333334e-05, "loss": 4.0324, "norm_diff": 0.0259, "norm_loss": 0.0, "num_token_doc": 66.6541, "num_token_overlap": 14.5817, "num_token_query": 37.3634, "num_token_union": 65.3183, "num_word_context": 202.2068, "num_word_doc": 49.6921, "num_word_query": 27.974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 54256.6957, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.4121, "query_norm": 1.5615, "queue_k_norm": 1.5851, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3634, "sent_len_1": 66.6541, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1225, "stdk": 0.0486, "stdq": 0.0424, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9996, "doc_norm": 1.5918, "encoder_q-embeddings": 3945.8311, "encoder_q-layer.0": 3124.6682, "encoder_q-layer.1": 3847.7383, "encoder_q-layer.10": 936.6466, "encoder_q-layer.11": 2231.4961, "encoder_q-layer.2": 4469.2808, "encoder_q-layer.3": 4545.6133, "encoder_q-layer.4": 5511.3799, "encoder_q-layer.5": 5060.252, "encoder_q-layer.6": 4321.999, "encoder_q-layer.7": 2852.8452, "encoder_q-layer.8": 1614.2213, "encoder_q-layer.9": 795.9369, "epoch": 0.11, "inbatch_neg_score": 0.4352, "inbatch_pos_score": 0.9995, "learning_rate": 4.927777777777778e-05, "loss": 3.9996, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.4687, "num_token_overlap": 14.5804, "num_token_query": 37.283, "num_token_union": 65.1699, "num_word_context": 202.0574, "num_word_doc": 49.605, "num_word_query": 27.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5469.3257, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4312, "query_norm": 1.5534, "queue_k_norm": 1.5911, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.283, "sent_len_1": 66.4687, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.5875, "stdk": 0.0486, "stdq": 0.0427, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9591, "doc_norm": 1.6, "encoder_q-embeddings": 1194.2152, "encoder_q-layer.0": 825.0165, "encoder_q-layer.1": 964.9481, "encoder_q-layer.10": 881.4336, "encoder_q-layer.11": 2223.7605, "encoder_q-layer.2": 1086.7653, "encoder_q-layer.3": 1113.8372, "encoder_q-layer.4": 1114.2397, "encoder_q-layer.5": 1257.3236, "encoder_q-layer.6": 1138.2166, "encoder_q-layer.7": 939.5537, "encoder_q-layer.8": 890.8521, "encoder_q-layer.9": 742.4966, "epoch": 0.11, "inbatch_neg_score": 0.4488, "inbatch_pos_score": 1.0332, "learning_rate": 4.922222222222222e-05, "loss": 3.9591, "norm_diff": 0.0346, "norm_loss": 0.0, "num_token_doc": 66.5994, "num_token_overlap": 14.5234, "num_token_query": 37.0649, "num_token_union": 65.1852, "num_word_context": 201.7922, "num_word_doc": 49.7532, "num_word_query": 27.7315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1748.4065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4456, "query_norm": 1.5654, "queue_k_norm": 1.5954, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0649, "sent_len_1": 66.5994, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0987, "stdk": 0.0488, "stdq": 0.0431, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.963, "doc_norm": 1.5981, "encoder_q-embeddings": 960.5465, "encoder_q-layer.0": 679.3436, "encoder_q-layer.1": 789.2286, "encoder_q-layer.10": 963.7819, "encoder_q-layer.11": 2273.9539, "encoder_q-layer.2": 891.3055, "encoder_q-layer.3": 931.3821, "encoder_q-layer.4": 904.33, "encoder_q-layer.5": 776.3511, "encoder_q-layer.6": 859.7037, "encoder_q-layer.7": 825.3693, "encoder_q-layer.8": 842.9001, "encoder_q-layer.9": 704.0209, "epoch": 0.11, "inbatch_neg_score": 0.4121, "inbatch_pos_score": 0.9858, "learning_rate": 4.9166666666666665e-05, "loss": 3.963, "norm_diff": 0.0459, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 14.5404, "num_token_query": 37.2813, "num_token_union": 65.322, "num_word_context": 202.3221, "num_word_doc": 49.7225, "num_word_query": 27.8927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1500.1086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4075, "query_norm": 1.5522, "queue_k_norm": 1.5965, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2813, "sent_len_1": 66.627, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7713, "stdk": 0.0487, "stdq": 0.0423, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9505, "doc_norm": 1.6035, "encoder_q-embeddings": 2507.4316, "encoder_q-layer.0": 1696.9683, "encoder_q-layer.1": 1825.3434, "encoder_q-layer.10": 868.7401, "encoder_q-layer.11": 2187.5659, "encoder_q-layer.2": 2290.78, "encoder_q-layer.3": 2178.9783, "encoder_q-layer.4": 2244.0261, "encoder_q-layer.5": 2095.3596, "encoder_q-layer.6": 1936.8887, "encoder_q-layer.7": 1458.5649, "encoder_q-layer.8": 924.2915, "encoder_q-layer.9": 756.6083, "epoch": 0.11, "inbatch_neg_score": 0.4178, "inbatch_pos_score": 0.9692, "learning_rate": 4.9111111111111114e-05, "loss": 3.9505, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.8807, "num_token_overlap": 14.552, "num_token_query": 37.261, "num_token_union": 65.466, "num_word_context": 202.6839, "num_word_doc": 49.9797, "num_word_query": 27.8824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2811.1055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4158, "query_norm": 1.5322, "queue_k_norm": 1.6008, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.261, "sent_len_1": 66.8807, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8113, "stdk": 0.0488, "stdq": 0.0422, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9244, "doc_norm": 1.6048, "encoder_q-embeddings": 1215.683, "encoder_q-layer.0": 782.4588, "encoder_q-layer.1": 790.5199, "encoder_q-layer.10": 923.7299, "encoder_q-layer.11": 2259.8506, "encoder_q-layer.2": 884.9316, "encoder_q-layer.3": 973.637, "encoder_q-layer.4": 1062.7086, "encoder_q-layer.5": 1074.9462, "encoder_q-layer.6": 1187.5082, "encoder_q-layer.7": 1178.2419, "encoder_q-layer.8": 941.1404, "encoder_q-layer.9": 795.8862, "epoch": 0.11, "inbatch_neg_score": 0.4166, "inbatch_pos_score": 0.9995, "learning_rate": 4.905555555555556e-05, "loss": 3.9244, "norm_diff": 0.0256, "norm_loss": 0.0, "num_token_doc": 67.0309, "num_token_overlap": 14.6412, "num_token_query": 37.4034, "num_token_union": 65.5347, "num_word_context": 202.6239, "num_word_doc": 50.0332, "num_word_query": 28.013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1677.2845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4116, "query_norm": 1.5792, "queue_k_norm": 1.5997, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4034, "sent_len_1": 67.0309, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1562, "stdk": 0.0488, "stdq": 0.0437, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.9235, "doc_norm": 1.5941, "encoder_q-embeddings": 13929.6641, "encoder_q-layer.0": 13178.1182, "encoder_q-layer.1": 14577.75, "encoder_q-layer.10": 936.9346, "encoder_q-layer.11": 2026.3065, "encoder_q-layer.2": 16526.9277, "encoder_q-layer.3": 15471.8203, "encoder_q-layer.4": 19308.4746, "encoder_q-layer.5": 16401.5781, "encoder_q-layer.6": 11052.2803, "encoder_q-layer.7": 5426.1572, "encoder_q-layer.8": 3634.5923, "encoder_q-layer.9": 1280.1892, "epoch": 0.12, "inbatch_neg_score": 0.3946, "inbatch_pos_score": 1.0049, "learning_rate": 4.9e-05, "loss": 3.9235, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.5172, "num_token_overlap": 14.5591, "num_token_query": 37.4113, "num_token_union": 65.2888, "num_word_context": 202.1649, "num_word_doc": 49.6183, "num_word_query": 27.9995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18508.1436, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3916, "query_norm": 1.5861, "queue_k_norm": 1.5945, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4113, "sent_len_1": 66.5172, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.27, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.9351, "doc_norm": 1.5956, "encoder_q-embeddings": 411.512, "encoder_q-layer.0": 301.2674, "encoder_q-layer.1": 345.4763, "encoder_q-layer.10": 383.2256, "encoder_q-layer.11": 1039.0344, "encoder_q-layer.2": 391.9719, "encoder_q-layer.3": 390.3736, "encoder_q-layer.4": 443.5029, "encoder_q-layer.5": 412.6932, "encoder_q-layer.6": 419.3969, "encoder_q-layer.7": 373.7132, "encoder_q-layer.8": 391.0996, "encoder_q-layer.9": 330.5078, "epoch": 0.12, "inbatch_neg_score": 0.3637, "inbatch_pos_score": 0.9434, "learning_rate": 4.894444444444445e-05, "loss": 3.9351, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.802, "num_token_overlap": 14.5863, "num_token_query": 37.3119, "num_token_union": 65.4109, "num_word_context": 202.3304, "num_word_doc": 49.8271, "num_word_query": 27.9296, "postclip_grad_norm": 1.0, "preclip_grad_norm": 695.9949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3611, "query_norm": 1.5097, "queue_k_norm": 1.5915, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3119, "sent_len_1": 66.802, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7962, "stdk": 0.0486, "stdq": 0.0416, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.9229, "doc_norm": 1.5892, "encoder_q-embeddings": 1127.9547, "encoder_q-layer.0": 777.7877, "encoder_q-layer.1": 905.231, "encoder_q-layer.10": 441.2637, "encoder_q-layer.11": 1308.3982, "encoder_q-layer.2": 1170.921, "encoder_q-layer.3": 1232.7324, "encoder_q-layer.4": 1171.6576, "encoder_q-layer.5": 1201.349, "encoder_q-layer.6": 1285.5281, "encoder_q-layer.7": 1090.5691, "encoder_q-layer.8": 852.6761, "encoder_q-layer.9": 414.9767, "epoch": 0.12, "inbatch_neg_score": 0.346, "inbatch_pos_score": 0.937, "learning_rate": 4.888888888888889e-05, "loss": 3.9229, "norm_diff": 0.0179, "norm_loss": 0.0, "num_token_doc": 66.8596, "num_token_overlap": 14.6026, "num_token_query": 37.2588, "num_token_union": 65.393, "num_word_context": 202.4239, "num_word_doc": 49.8563, "num_word_query": 27.8816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1522.8562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3418, "query_norm": 1.5753, "queue_k_norm": 1.5888, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2588, "sent_len_1": 66.8596, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.9675, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9171, "doc_norm": 1.5864, "encoder_q-embeddings": 991.2058, "encoder_q-layer.0": 887.7039, "encoder_q-layer.1": 1073.8871, "encoder_q-layer.10": 446.8472, "encoder_q-layer.11": 1056.1505, "encoder_q-layer.2": 1299.5554, "encoder_q-layer.3": 1076.0505, "encoder_q-layer.4": 992.5078, "encoder_q-layer.5": 880.5903, "encoder_q-layer.6": 958.3864, "encoder_q-layer.7": 805.1418, "encoder_q-layer.8": 949.9044, "encoder_q-layer.9": 384.2587, "epoch": 0.12, "inbatch_neg_score": 0.3355, "inbatch_pos_score": 0.9453, "learning_rate": 4.883333333333334e-05, "loss": 3.9171, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 14.5539, "num_token_query": 37.1728, "num_token_union": 65.3439, "num_word_context": 202.1058, "num_word_doc": 49.8527, "num_word_query": 27.8251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1423.8228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.333, "query_norm": 1.5762, "queue_k_norm": 1.5895, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1728, "sent_len_1": 66.7788, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7025, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.9183, "doc_norm": 1.5851, "encoder_q-embeddings": 29858.0742, "encoder_q-layer.0": 20694.7969, "encoder_q-layer.1": 19939.1875, "encoder_q-layer.10": 453.2264, "encoder_q-layer.11": 999.2375, "encoder_q-layer.2": 24960.2676, "encoder_q-layer.3": 19889.3281, "encoder_q-layer.4": 19025.25, "encoder_q-layer.5": 17667.5781, "encoder_q-layer.6": 13204.0322, "encoder_q-layer.7": 10791.6074, "encoder_q-layer.8": 6814.2593, "encoder_q-layer.9": 1075.072, "epoch": 0.12, "inbatch_neg_score": 0.3257, "inbatch_pos_score": 0.8813, "learning_rate": 4.8777777777777775e-05, "loss": 3.9183, "norm_diff": 0.0197, "norm_loss": 0.0, "num_token_doc": 66.7413, "num_token_overlap": 14.6425, "num_token_query": 37.3614, "num_token_union": 65.3004, "num_word_context": 202.1172, "num_word_doc": 49.7319, "num_word_query": 27.9665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26691.2086, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3228, "query_norm": 1.5675, "queue_k_norm": 1.5879, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3614, "sent_len_1": 66.7413, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.045, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.9119, "doc_norm": 1.5783, "encoder_q-embeddings": 1209.4707, "encoder_q-layer.0": 932.759, "encoder_q-layer.1": 1102.8992, "encoder_q-layer.10": 438.9497, "encoder_q-layer.11": 1346.3098, "encoder_q-layer.2": 1069.9738, "encoder_q-layer.3": 1136.6895, "encoder_q-layer.4": 1303.2906, "encoder_q-layer.5": 1253.0333, "encoder_q-layer.6": 1472.7915, "encoder_q-layer.7": 1459.3718, "encoder_q-layer.8": 972.5214, "encoder_q-layer.9": 393.7846, "epoch": 0.12, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 0.9707, "learning_rate": 4.8722222222222224e-05, "loss": 3.9119, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.8846, "num_token_overlap": 14.5865, "num_token_query": 37.3289, "num_token_union": 65.4589, "num_word_context": 202.1992, "num_word_doc": 49.9217, "num_word_query": 27.9351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1685.6408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3696, "query_norm": 1.6063, "queue_k_norm": 1.5782, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3289, "sent_len_1": 66.8846, "sent_len_max_0": 128.0, "sent_len_max_1": 188.835, "stdk": 0.0483, "stdq": 0.0438, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.8866, "doc_norm": 1.5644, "encoder_q-embeddings": 37645.5938, "encoder_q-layer.0": 28367.623, "encoder_q-layer.1": 35184.5664, "encoder_q-layer.10": 576.7056, "encoder_q-layer.11": 1323.1102, "encoder_q-layer.2": 42982.5234, "encoder_q-layer.3": 46281.7109, "encoder_q-layer.4": 43505.3398, "encoder_q-layer.5": 40531.7734, "encoder_q-layer.6": 34867.2695, "encoder_q-layer.7": 28394.1426, "encoder_q-layer.8": 14723.3994, "encoder_q-layer.9": 2317.8225, "epoch": 0.12, "inbatch_neg_score": 0.3556, "inbatch_pos_score": 0.9131, "learning_rate": 4.866666666666667e-05, "loss": 3.8866, "norm_diff": 0.0319, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 14.6418, "num_token_query": 37.569, "num_token_union": 65.4909, "num_word_context": 202.4811, "num_word_doc": 49.885, "num_word_query": 28.1505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 48091.7385, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.3511, "query_norm": 1.5963, "queue_k_norm": 1.5672, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.569, "sent_len_1": 66.8156, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0525, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.9496, "doc_norm": 1.5509, "encoder_q-embeddings": 1755.0037, "encoder_q-layer.0": 1257.1947, "encoder_q-layer.1": 1306.6429, "encoder_q-layer.10": 392.6157, "encoder_q-layer.11": 1050.1404, "encoder_q-layer.2": 1558.9473, "encoder_q-layer.3": 1730.3485, "encoder_q-layer.4": 1850.7623, "encoder_q-layer.5": 1705.9084, "encoder_q-layer.6": 1657.5614, "encoder_q-layer.7": 1406.922, "encoder_q-layer.8": 962.7623, "encoder_q-layer.9": 360.1285, "epoch": 0.12, "inbatch_neg_score": 0.3691, "inbatch_pos_score": 0.9556, "learning_rate": 4.8611111111111115e-05, "loss": 3.9496, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 14.5064, "num_token_query": 37.1696, "num_token_union": 65.3299, "num_word_context": 202.4892, "num_word_doc": 49.7917, "num_word_query": 27.8347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2112.186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.6107, "queue_k_norm": 1.5551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1696, "sent_len_1": 66.7383, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2887, "stdk": 0.0478, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9111, "doc_norm": 1.5457, "encoder_q-embeddings": 66070.625, "encoder_q-layer.0": 49732.3984, "encoder_q-layer.1": 49577.5703, "encoder_q-layer.10": 590.4841, "encoder_q-layer.11": 1071.7186, "encoder_q-layer.2": 56151.4727, "encoder_q-layer.3": 63969.6172, "encoder_q-layer.4": 60173.7031, "encoder_q-layer.5": 52159.9141, "encoder_q-layer.6": 42589.7539, "encoder_q-layer.7": 32049.0391, "encoder_q-layer.8": 29352.8301, "encoder_q-layer.9": 4297.6406, "epoch": 0.12, "inbatch_neg_score": 0.3466, "inbatch_pos_score": 0.9121, "learning_rate": 4.855555555555556e-05, "loss": 3.9111, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.7905, "num_token_overlap": 14.5467, "num_token_query": 37.227, "num_token_union": 65.3375, "num_word_context": 201.9479, "num_word_doc": 49.7837, "num_word_query": 27.8643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 70630.3233, "preclip_grad_norm_avg": 0.0006, "q@queue_neg_score": 0.3442, "query_norm": 1.5614, "queue_k_norm": 1.5424, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.227, "sent_len_1": 66.7905, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.7775, "stdk": 0.0479, "stdq": 0.0417, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.9551, "doc_norm": 1.5306, "encoder_q-embeddings": 1890.2969, "encoder_q-layer.0": 1375.0369, "encoder_q-layer.1": 1389.0649, "encoder_q-layer.10": 429.3307, "encoder_q-layer.11": 1219.5111, "encoder_q-layer.2": 1475.9039, "encoder_q-layer.3": 1449.0969, "encoder_q-layer.4": 1584.5618, "encoder_q-layer.5": 1485.2555, "encoder_q-layer.6": 1640.9329, "encoder_q-layer.7": 1336.3525, "encoder_q-layer.8": 988.108, "encoder_q-layer.9": 410.2833, "epoch": 0.12, "inbatch_neg_score": 0.3563, "inbatch_pos_score": 0.9248, "learning_rate": 4.85e-05, "loss": 3.9551, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.9658, "num_token_overlap": 14.6021, "num_token_query": 37.2898, "num_token_union": 65.4638, "num_word_context": 202.4841, "num_word_doc": 49.9711, "num_word_query": 27.9198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2059.884, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.355, "query_norm": 1.5595, "queue_k_norm": 1.53, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2898, "sent_len_1": 66.9658, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6163, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9187, "doc_norm": 1.5138, "encoder_q-embeddings": 1063.6802, "encoder_q-layer.0": 762.486, "encoder_q-layer.1": 844.254, "encoder_q-layer.10": 389.8652, "encoder_q-layer.11": 1161.3546, "encoder_q-layer.2": 985.38, "encoder_q-layer.3": 1075.0458, "encoder_q-layer.4": 1221.7258, "encoder_q-layer.5": 1167.4027, "encoder_q-layer.6": 1121.5079, "encoder_q-layer.7": 971.491, "encoder_q-layer.8": 688.1264, "encoder_q-layer.9": 350.1623, "epoch": 0.12, "inbatch_neg_score": 0.3643, "inbatch_pos_score": 0.9443, "learning_rate": 4.844444444444445e-05, "loss": 3.9187, "norm_diff": 0.0653, "norm_loss": 0.0, "num_token_doc": 66.8188, "num_token_overlap": 14.6516, "num_token_query": 37.5353, "num_token_union": 65.4881, "num_word_context": 202.3353, "num_word_doc": 49.8621, "num_word_query": 28.1198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1418.3812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3601, "query_norm": 1.5791, "queue_k_norm": 1.5201, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5353, "sent_len_1": 66.8188, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1775, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.909, "doc_norm": 1.5038, "encoder_q-embeddings": 2949.3428, "encoder_q-layer.0": 2211.6936, "encoder_q-layer.1": 2329.5811, "encoder_q-layer.10": 438.9591, "encoder_q-layer.11": 1230.5814, "encoder_q-layer.2": 2404.2251, "encoder_q-layer.3": 2462.1201, "encoder_q-layer.4": 2272.9207, "encoder_q-layer.5": 1934.4706, "encoder_q-layer.6": 1859.5115, "encoder_q-layer.7": 2079.2178, "encoder_q-layer.8": 1303.9921, "encoder_q-layer.9": 446.5851, "epoch": 0.13, "inbatch_neg_score": 0.336, "inbatch_pos_score": 0.9116, "learning_rate": 4.838888888888889e-05, "loss": 3.909, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.8018, "num_token_overlap": 14.5981, "num_token_query": 37.4051, "num_token_union": 65.3792, "num_word_context": 202.1872, "num_word_doc": 49.8328, "num_word_query": 27.9901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3035.9256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3328, "query_norm": 1.565, "queue_k_norm": 1.5087, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4051, "sent_len_1": 66.8018, "sent_len_max_0": 127.99, "sent_len_max_1": 188.82, "stdk": 0.0472, "stdq": 0.0429, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9116, "doc_norm": 1.4945, "encoder_q-embeddings": 840.3642, "encoder_q-layer.0": 588.383, "encoder_q-layer.1": 661.6956, "encoder_q-layer.10": 410.8098, "encoder_q-layer.11": 1120.24, "encoder_q-layer.2": 677.7241, "encoder_q-layer.3": 710.7976, "encoder_q-layer.4": 791.9614, "encoder_q-layer.5": 800.6518, "encoder_q-layer.6": 854.0451, "encoder_q-layer.7": 829.2392, "encoder_q-layer.8": 594.8271, "encoder_q-layer.9": 364.7046, "epoch": 0.13, "inbatch_neg_score": 0.3038, "inbatch_pos_score": 0.8721, "learning_rate": 4.8333333333333334e-05, "loss": 3.9116, "norm_diff": 0.0608, "norm_loss": 0.0, "num_token_doc": 66.7952, "num_token_overlap": 14.5395, "num_token_query": 37.2305, "num_token_union": 65.363, "num_word_context": 202.0279, "num_word_doc": 49.8136, "num_word_query": 27.8447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1120.0837, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.5553, "queue_k_norm": 1.4986, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2305, "sent_len_1": 66.7952, "sent_len_max_0": 127.985, "sent_len_max_1": 190.7887, "stdk": 0.0471, "stdq": 0.0431, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8893, "doc_norm": 1.4908, "encoder_q-embeddings": 1335.1769, "encoder_q-layer.0": 975.0187, "encoder_q-layer.1": 1120.5837, "encoder_q-layer.10": 496.1234, "encoder_q-layer.11": 1133.9163, "encoder_q-layer.2": 1419.3966, "encoder_q-layer.3": 1365.0371, "encoder_q-layer.4": 1464.8837, "encoder_q-layer.5": 1414.9969, "encoder_q-layer.6": 1473.6744, "encoder_q-layer.7": 1013.5506, "encoder_q-layer.8": 905.3033, "encoder_q-layer.9": 453.3957, "epoch": 0.13, "inbatch_neg_score": 0.2693, "inbatch_pos_score": 0.8472, "learning_rate": 4.8277777777777776e-05, "loss": 3.8893, "norm_diff": 0.1367, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 14.6009, "num_token_query": 37.3276, "num_token_union": 65.3354, "num_word_context": 202.4363, "num_word_doc": 49.8694, "num_word_query": 27.9233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1738.7601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2661, "query_norm": 1.6275, "queue_k_norm": 1.4949, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3276, "sent_len_1": 66.8064, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0213, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9134, "doc_norm": 1.491, "encoder_q-embeddings": 412.8035, "encoder_q-layer.0": 315.3855, "encoder_q-layer.1": 334.3739, "encoder_q-layer.10": 201.9866, "encoder_q-layer.11": 458.1177, "encoder_q-layer.2": 394.7128, "encoder_q-layer.3": 398.9913, "encoder_q-layer.4": 410.9679, "encoder_q-layer.5": 389.2823, "encoder_q-layer.6": 431.7487, "encoder_q-layer.7": 360.7096, "encoder_q-layer.8": 333.0128, "encoder_q-layer.9": 181.9681, "epoch": 0.13, "inbatch_neg_score": 0.2162, "inbatch_pos_score": 0.8076, "learning_rate": 4.8222222222222225e-05, "loss": 3.9134, "norm_diff": 0.1577, "norm_loss": 0.0, "num_token_doc": 66.7819, "num_token_overlap": 14.5859, "num_token_query": 37.2598, "num_token_union": 65.375, "num_word_context": 202.3962, "num_word_doc": 49.8766, "num_word_query": 27.9196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 535.1346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2141, "query_norm": 1.6487, "queue_k_norm": 1.4891, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2598, "sent_len_1": 66.7819, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5475, "stdk": 0.0475, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9112, "doc_norm": 1.4853, "encoder_q-embeddings": 444.0283, "encoder_q-layer.0": 318.0432, "encoder_q-layer.1": 361.7171, "encoder_q-layer.10": 228.8218, "encoder_q-layer.11": 513.8472, "encoder_q-layer.2": 403.817, "encoder_q-layer.3": 456.116, "encoder_q-layer.4": 498.6218, "encoder_q-layer.5": 478.9005, "encoder_q-layer.6": 482.5121, "encoder_q-layer.7": 411.6509, "encoder_q-layer.8": 362.2313, "encoder_q-layer.9": 210.3659, "epoch": 0.13, "inbatch_neg_score": 0.2785, "inbatch_pos_score": 0.8633, "learning_rate": 4.8166666666666674e-05, "loss": 3.9112, "norm_diff": 0.098, "norm_loss": 0.0, "num_token_doc": 66.7718, "num_token_overlap": 14.5193, "num_token_query": 37.2515, "num_token_union": 65.3601, "num_word_context": 202.273, "num_word_doc": 49.8287, "num_word_query": 27.8806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 603.5869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2754, "query_norm": 1.5832, "queue_k_norm": 1.4835, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2515, "sent_len_1": 66.7718, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1725, "stdk": 0.0475, "stdq": 0.0447, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9026, "doc_norm": 1.4733, "encoder_q-embeddings": 502.2665, "encoder_q-layer.0": 379.0663, "encoder_q-layer.1": 376.4205, "encoder_q-layer.10": 221.4889, "encoder_q-layer.11": 512.3356, "encoder_q-layer.2": 456.4826, "encoder_q-layer.3": 496.1473, "encoder_q-layer.4": 533.9751, "encoder_q-layer.5": 480.2036, "encoder_q-layer.6": 519.7219, "encoder_q-layer.7": 480.4012, "encoder_q-layer.8": 460.4959, "encoder_q-layer.9": 213.6161, "epoch": 0.13, "inbatch_neg_score": 0.2438, "inbatch_pos_score": 0.8091, "learning_rate": 4.811111111111111e-05, "loss": 3.9026, "norm_diff": 0.1657, "norm_loss": 0.0, "num_token_doc": 66.6717, "num_token_overlap": 14.6573, "num_token_query": 37.3694, "num_token_union": 65.3048, "num_word_context": 202.3065, "num_word_doc": 49.7874, "num_word_query": 27.9761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 665.5339, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2399, "query_norm": 1.6389, "queue_k_norm": 1.4761, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3694, "sent_len_1": 66.6717, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6788, "stdk": 0.0473, "stdq": 0.0428, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9165, "doc_norm": 1.467, "encoder_q-embeddings": 1256.0154, "encoder_q-layer.0": 923.4374, "encoder_q-layer.1": 1017.2027, "encoder_q-layer.10": 199.5599, "encoder_q-layer.11": 583.5364, "encoder_q-layer.2": 1098.1167, "encoder_q-layer.3": 1106.9282, "encoder_q-layer.4": 1082.2036, "encoder_q-layer.5": 894.0294, "encoder_q-layer.6": 911.525, "encoder_q-layer.7": 810.3495, "encoder_q-layer.8": 581.2156, "encoder_q-layer.9": 206.8901, "epoch": 0.13, "inbatch_neg_score": 0.2626, "inbatch_pos_score": 0.8403, "learning_rate": 4.805555555555556e-05, "loss": 3.9165, "norm_diff": 0.1186, "norm_loss": 0.0, "num_token_doc": 66.841, "num_token_overlap": 14.5764, "num_token_query": 37.2553, "num_token_union": 65.3149, "num_word_context": 202.0151, "num_word_doc": 49.8673, "num_word_query": 27.8735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1340.1309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.259, "query_norm": 1.5856, "queue_k_norm": 1.4716, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2553, "sent_len_1": 66.841, "sent_len_max_0": 127.995, "sent_len_max_1": 191.21, "stdk": 0.0472, "stdq": 0.0441, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.88, "doc_norm": 1.4722, "encoder_q-embeddings": 303.8983, "encoder_q-layer.0": 206.2369, "encoder_q-layer.1": 224.1565, "encoder_q-layer.10": 221.6894, "encoder_q-layer.11": 501.6223, "encoder_q-layer.2": 247.7545, "encoder_q-layer.3": 255.8148, "encoder_q-layer.4": 272.1188, "encoder_q-layer.5": 257.4688, "encoder_q-layer.6": 239.8875, "encoder_q-layer.7": 224.7662, "encoder_q-layer.8": 229.9079, "encoder_q-layer.9": 186.7445, "epoch": 0.13, "inbatch_neg_score": 0.275, "inbatch_pos_score": 0.8604, "learning_rate": 4.8e-05, "loss": 3.88, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 66.866, "num_token_overlap": 14.6654, "num_token_query": 37.6178, "num_token_union": 65.501, "num_word_context": 202.6684, "num_word_doc": 49.8894, "num_word_query": 28.1909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 405.6356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2734, "query_norm": 1.5691, "queue_k_norm": 1.4645, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.6178, "sent_len_1": 66.866, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.495, "stdk": 0.0475, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9139, "doc_norm": 1.4529, "encoder_q-embeddings": 1308.264, "encoder_q-layer.0": 922.0652, "encoder_q-layer.1": 847.0453, "encoder_q-layer.10": 199.214, "encoder_q-layer.11": 542.9327, "encoder_q-layer.2": 886.9715, "encoder_q-layer.3": 816.7667, "encoder_q-layer.4": 795.9622, "encoder_q-layer.5": 791.9976, "encoder_q-layer.6": 685.9692, "encoder_q-layer.7": 564.7557, "encoder_q-layer.8": 472.8262, "encoder_q-layer.9": 178.1671, "epoch": 0.13, "inbatch_neg_score": 0.2741, "inbatch_pos_score": 0.8271, "learning_rate": 4.794444444444445e-05, "loss": 3.9139, "norm_diff": 0.1041, "norm_loss": 0.0, "num_token_doc": 66.6751, "num_token_overlap": 14.5483, "num_token_query": 37.3363, "num_token_union": 65.3614, "num_word_context": 202.2567, "num_word_doc": 49.7459, "num_word_query": 27.972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1194.4032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2698, "query_norm": 1.557, "queue_k_norm": 1.4586, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3363, "sent_len_1": 66.6751, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2875, "stdk": 0.0471, "stdq": 0.0429, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.877, "doc_norm": 1.45, "encoder_q-embeddings": 1012.8815, "encoder_q-layer.0": 615.6494, "encoder_q-layer.1": 624.4318, "encoder_q-layer.10": 211.9543, "encoder_q-layer.11": 527.2597, "encoder_q-layer.2": 679.3842, "encoder_q-layer.3": 713.512, "encoder_q-layer.4": 718.0286, "encoder_q-layer.5": 615.2223, "encoder_q-layer.6": 509.9846, "encoder_q-layer.7": 418.1698, "encoder_q-layer.8": 298.2925, "encoder_q-layer.9": 184.2392, "epoch": 0.13, "inbatch_neg_score": 0.2718, "inbatch_pos_score": 0.8208, "learning_rate": 4.7888888888888886e-05, "loss": 3.877, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 14.6235, "num_token_query": 37.3548, "num_token_union": 65.4472, "num_word_context": 202.2335, "num_word_doc": 49.8575, "num_word_query": 27.9725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 930.9528, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2678, "query_norm": 1.5506, "queue_k_norm": 1.4524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3548, "sent_len_1": 66.7857, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.4187, "stdk": 0.0471, "stdq": 0.043, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9224, "doc_norm": 1.4513, "encoder_q-embeddings": 460.2399, "encoder_q-layer.0": 346.6009, "encoder_q-layer.1": 389.2789, "encoder_q-layer.10": 219.1969, "encoder_q-layer.11": 508.3001, "encoder_q-layer.2": 461.5854, "encoder_q-layer.3": 448.5297, "encoder_q-layer.4": 488.4307, "encoder_q-layer.5": 461.2317, "encoder_q-layer.6": 410.1031, "encoder_q-layer.7": 310.7312, "encoder_q-layer.8": 309.3159, "encoder_q-layer.9": 191.6581, "epoch": 0.14, "inbatch_neg_score": 0.2397, "inbatch_pos_score": 0.8184, "learning_rate": 4.7833333333333335e-05, "loss": 3.9224, "norm_diff": 0.2264, "norm_loss": 0.0, "num_token_doc": 66.7388, "num_token_overlap": 14.6158, "num_token_query": 37.4176, "num_token_union": 65.327, "num_word_context": 202.2343, "num_word_doc": 49.7586, "num_word_query": 28.0204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 584.9508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.6777, "queue_k_norm": 1.4472, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4176, "sent_len_1": 66.7388, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7825, "stdk": 0.0473, "stdq": 0.0446, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9267, "doc_norm": 1.4458, "encoder_q-embeddings": 479.1082, "encoder_q-layer.0": 323.3647, "encoder_q-layer.1": 362.9725, "encoder_q-layer.10": 99.0208, "encoder_q-layer.11": 246.6045, "encoder_q-layer.2": 436.787, "encoder_q-layer.3": 469.5632, "encoder_q-layer.4": 556.5691, "encoder_q-layer.5": 515.076, "encoder_q-layer.6": 299.6098, "encoder_q-layer.7": 247.566, "encoder_q-layer.8": 209.0386, "encoder_q-layer.9": 101.8001, "epoch": 0.14, "inbatch_neg_score": 0.2281, "inbatch_pos_score": 0.8027, "learning_rate": 4.7777777777777784e-05, "loss": 3.9267, "norm_diff": 0.1416, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 14.5424, "num_token_query": 37.2863, "num_token_union": 65.3906, "num_word_context": 202.3438, "num_word_doc": 49.7848, "num_word_query": 27.9129, "postclip_grad_norm": 1.0, "preclip_grad_norm": 544.9127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2255, "query_norm": 1.5873, "queue_k_norm": 1.4433, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2863, "sent_len_1": 66.7179, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3625, "stdk": 0.0473, "stdq": 0.0433, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.887, "doc_norm": 1.4398, "encoder_q-embeddings": 895.6064, "encoder_q-layer.0": 613.2038, "encoder_q-layer.1": 654.5564, "encoder_q-layer.10": 111.4055, "encoder_q-layer.11": 267.9857, "encoder_q-layer.2": 754.2639, "encoder_q-layer.3": 830.8502, "encoder_q-layer.4": 899.9491, "encoder_q-layer.5": 900.1544, "encoder_q-layer.6": 964.0807, "encoder_q-layer.7": 986.9274, "encoder_q-layer.8": 595.5225, "encoder_q-layer.9": 183.5081, "epoch": 0.14, "inbatch_neg_score": 0.2609, "inbatch_pos_score": 0.8345, "learning_rate": 4.7722222222222226e-05, "loss": 3.887, "norm_diff": 0.118, "norm_loss": 0.0, "num_token_doc": 66.7125, "num_token_overlap": 14.5831, "num_token_query": 37.2615, "num_token_union": 65.2953, "num_word_context": 201.9911, "num_word_doc": 49.7656, "num_word_query": 27.9077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1079.4473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2573, "query_norm": 1.5578, "queue_k_norm": 1.4392, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2615, "sent_len_1": 66.7125, "sent_len_max_0": 127.985, "sent_len_max_1": 190.2862, "stdk": 0.0471, "stdq": 0.0441, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8797, "doc_norm": 1.4323, "encoder_q-embeddings": 215.696, "encoder_q-layer.0": 148.3789, "encoder_q-layer.1": 175.6226, "encoder_q-layer.10": 105.0552, "encoder_q-layer.11": 279.9843, "encoder_q-layer.2": 186.7114, "encoder_q-layer.3": 188.5297, "encoder_q-layer.4": 199.1911, "encoder_q-layer.5": 194.6065, "encoder_q-layer.6": 203.8098, "encoder_q-layer.7": 196.1853, "encoder_q-layer.8": 222.8658, "encoder_q-layer.9": 134.1696, "epoch": 0.14, "inbatch_neg_score": 0.2622, "inbatch_pos_score": 0.8418, "learning_rate": 4.766666666666667e-05, "loss": 3.8797, "norm_diff": 0.1636, "norm_loss": 0.0, "num_token_doc": 66.6324, "num_token_overlap": 14.6215, "num_token_query": 37.4083, "num_token_union": 65.3389, "num_word_context": 201.8858, "num_word_doc": 49.6896, "num_word_query": 27.9835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 290.1207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.5958, "queue_k_norm": 1.4358, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4083, "sent_len_1": 66.6324, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3137, "stdk": 0.047, "stdq": 0.0444, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.872, "doc_norm": 1.4333, "encoder_q-embeddings": 328.6235, "encoder_q-layer.0": 223.6714, "encoder_q-layer.1": 239.6843, "encoder_q-layer.10": 95.0392, "encoder_q-layer.11": 256.9527, "encoder_q-layer.2": 273.8526, "encoder_q-layer.3": 313.6964, "encoder_q-layer.4": 359.7258, "encoder_q-layer.5": 349.4817, "encoder_q-layer.6": 304.7842, "encoder_q-layer.7": 270.9828, "encoder_q-layer.8": 188.7227, "encoder_q-layer.9": 102.6937, "epoch": 0.14, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 0.8975, "learning_rate": 4.761111111111111e-05, "loss": 3.872, "norm_diff": 0.1857, "norm_loss": 0.0, "num_token_doc": 66.7913, "num_token_overlap": 14.566, "num_token_query": 37.3589, "num_token_union": 65.4204, "num_word_context": 202.4956, "num_word_doc": 49.8699, "num_word_query": 27.9826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 403.4146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.619, "queue_k_norm": 1.4346, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3589, "sent_len_1": 66.7913, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0662, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.897, "doc_norm": 1.4337, "encoder_q-embeddings": 257.9095, "encoder_q-layer.0": 190.3152, "encoder_q-layer.1": 215.5729, "encoder_q-layer.10": 95.7542, "encoder_q-layer.11": 257.1107, "encoder_q-layer.2": 235.3404, "encoder_q-layer.3": 253.5561, "encoder_q-layer.4": 252.7196, "encoder_q-layer.5": 265.7777, "encoder_q-layer.6": 234.6426, "encoder_q-layer.7": 192.9688, "encoder_q-layer.8": 146.0448, "encoder_q-layer.9": 93.1148, "epoch": 0.14, "inbatch_neg_score": 0.3029, "inbatch_pos_score": 0.8901, "learning_rate": 4.755555555555556e-05, "loss": 3.897, "norm_diff": 0.1786, "norm_loss": 0.0, "num_token_doc": 67.0175, "num_token_overlap": 14.634, "num_token_query": 37.1758, "num_token_union": 65.4007, "num_word_context": 202.5397, "num_word_doc": 50.0047, "num_word_query": 27.8252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 320.2567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2996, "query_norm": 1.6123, "queue_k_norm": 1.435, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1758, "sent_len_1": 67.0175, "sent_len_max_0": 128.0, "sent_len_max_1": 189.545, "stdk": 0.0472, "stdq": 0.0445, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8923, "doc_norm": 1.431, "encoder_q-embeddings": 419.9131, "encoder_q-layer.0": 280.6837, "encoder_q-layer.1": 307.4688, "encoder_q-layer.10": 95.5048, "encoder_q-layer.11": 263.3532, "encoder_q-layer.2": 343.0616, "encoder_q-layer.3": 345.3404, "encoder_q-layer.4": 357.0111, "encoder_q-layer.5": 278.4384, "encoder_q-layer.6": 255.99, "encoder_q-layer.7": 214.8671, "encoder_q-layer.8": 154.3376, "encoder_q-layer.9": 91.3431, "epoch": 0.14, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 0.8657, "learning_rate": 4.75e-05, "loss": 3.8923, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.889, "num_token_overlap": 14.5254, "num_token_query": 37.2916, "num_token_union": 65.447, "num_word_context": 202.4345, "num_word_doc": 49.8873, "num_word_query": 27.9134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 428.3946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2957, "query_norm": 1.5594, "queue_k_norm": 1.4334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2916, "sent_len_1": 66.889, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.8237, "stdk": 0.0471, "stdq": 0.0431, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.9014, "doc_norm": 1.4343, "encoder_q-embeddings": 150.0993, "encoder_q-layer.0": 102.8846, "encoder_q-layer.1": 123.12, "encoder_q-layer.10": 96.0802, "encoder_q-layer.11": 269.1583, "encoder_q-layer.2": 132.562, "encoder_q-layer.3": 137.7945, "encoder_q-layer.4": 149.2814, "encoder_q-layer.5": 153.1562, "encoder_q-layer.6": 168.2379, "encoder_q-layer.7": 161.8585, "encoder_q-layer.8": 151.5898, "encoder_q-layer.9": 99.3724, "epoch": 0.14, "inbatch_neg_score": 0.3162, "inbatch_pos_score": 0.8965, "learning_rate": 4.7444444444444445e-05, "loss": 3.9014, "norm_diff": 0.1378, "norm_loss": 0.0, "num_token_doc": 66.7327, "num_token_overlap": 14.5705, "num_token_query": 37.3965, "num_token_union": 65.4217, "num_word_context": 202.4194, "num_word_doc": 49.7876, "num_word_query": 27.9991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 225.6842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3142, "query_norm": 1.5721, "queue_k_norm": 1.4318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3965, "sent_len_1": 66.7327, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2488, "stdk": 0.0473, "stdq": 0.0434, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.8756, "doc_norm": 1.4366, "encoder_q-embeddings": 213.0792, "encoder_q-layer.0": 160.006, "encoder_q-layer.1": 184.8333, "encoder_q-layer.10": 109.7916, "encoder_q-layer.11": 286.6121, "encoder_q-layer.2": 215.5701, "encoder_q-layer.3": 232.2227, "encoder_q-layer.4": 217.4747, "encoder_q-layer.5": 204.0279, "encoder_q-layer.6": 197.0143, "encoder_q-layer.7": 190.2835, "encoder_q-layer.8": 168.4443, "encoder_q-layer.9": 94.5333, "epoch": 0.14, "inbatch_neg_score": 0.3224, "inbatch_pos_score": 0.9092, "learning_rate": 4.7388888888888894e-05, "loss": 3.8756, "norm_diff": 0.1455, "norm_loss": 0.0, "num_token_doc": 66.8695, "num_token_overlap": 14.661, "num_token_query": 37.4284, "num_token_union": 65.4431, "num_word_context": 202.2604, "num_word_doc": 49.8793, "num_word_query": 28.0408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 291.3618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3201, "query_norm": 1.5821, "queue_k_norm": 1.4337, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4284, "sent_len_1": 66.8695, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8837, "stdk": 0.0474, "stdq": 0.0435, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.9031, "doc_norm": 1.4313, "encoder_q-embeddings": 446.9283, "encoder_q-layer.0": 315.3466, "encoder_q-layer.1": 370.4336, "encoder_q-layer.10": 98.3771, "encoder_q-layer.11": 272.096, "encoder_q-layer.2": 404.6351, "encoder_q-layer.3": 450.287, "encoder_q-layer.4": 497.9726, "encoder_q-layer.5": 448.4268, "encoder_q-layer.6": 473.5137, "encoder_q-layer.7": 363.9238, "encoder_q-layer.8": 295.1795, "encoder_q-layer.9": 98.1429, "epoch": 0.14, "inbatch_neg_score": 0.333, "inbatch_pos_score": 0.9199, "learning_rate": 4.7333333333333336e-05, "loss": 3.9031, "norm_diff": 0.2069, "norm_loss": 0.0, "num_token_doc": 66.8123, "num_token_overlap": 14.7008, "num_token_query": 37.5456, "num_token_union": 65.3862, "num_word_context": 202.2033, "num_word_doc": 49.8585, "num_word_query": 28.1433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 557.5081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3286, "query_norm": 1.6382, "queue_k_norm": 1.4308, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5456, "sent_len_1": 66.8123, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9712, "stdk": 0.0471, "stdq": 0.0436, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.9083, "doc_norm": 1.4359, "encoder_q-embeddings": 110.7174, "encoder_q-layer.0": 75.765, "encoder_q-layer.1": 81.5026, "encoder_q-layer.10": 177.5743, "encoder_q-layer.11": 307.3953, "encoder_q-layer.2": 90.5618, "encoder_q-layer.3": 97.2507, "encoder_q-layer.4": 106.4557, "encoder_q-layer.5": 106.1047, "encoder_q-layer.6": 110.2903, "encoder_q-layer.7": 107.1928, "encoder_q-layer.8": 113.6651, "encoder_q-layer.9": 95.789, "epoch": 0.15, "inbatch_neg_score": 0.3242, "inbatch_pos_score": 0.9194, "learning_rate": 4.727777777777778e-05, "loss": 3.9083, "norm_diff": 0.1383, "norm_loss": 0.0, "num_token_doc": 66.5985, "num_token_overlap": 14.5725, "num_token_query": 37.3074, "num_token_union": 65.2363, "num_word_context": 201.9584, "num_word_doc": 49.6659, "num_word_query": 27.9505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 200.777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.5742, "queue_k_norm": 1.4265, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3074, "sent_len_1": 66.5985, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.605, "stdk": 0.0473, "stdq": 0.0442, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.9125, "doc_norm": 1.4287, "encoder_q-embeddings": 948.0167, "encoder_q-layer.0": 754.6155, "encoder_q-layer.1": 806.9476, "encoder_q-layer.10": 102.5674, "encoder_q-layer.11": 244.4049, "encoder_q-layer.2": 918.9608, "encoder_q-layer.3": 923.75, "encoder_q-layer.4": 744.1813, "encoder_q-layer.5": 655.0289, "encoder_q-layer.6": 719.5796, "encoder_q-layer.7": 744.5883, "encoder_q-layer.8": 632.3776, "encoder_q-layer.9": 149.8463, "epoch": 0.15, "inbatch_neg_score": 0.3524, "inbatch_pos_score": 0.9155, "learning_rate": 4.722222222222222e-05, "loss": 3.9125, "norm_diff": 0.2238, "norm_loss": 0.0, "num_token_doc": 66.6061, "num_token_overlap": 14.5668, "num_token_query": 37.3136, "num_token_union": 65.2962, "num_word_context": 201.8475, "num_word_doc": 49.7101, "num_word_query": 27.9405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1070.7356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3481, "query_norm": 1.6525, "queue_k_norm": 1.4287, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3136, "sent_len_1": 66.6061, "sent_len_max_0": 128.0, "sent_len_max_1": 187.955, "stdk": 0.047, "stdq": 0.0433, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.906, "doc_norm": 1.4315, "encoder_q-embeddings": 192.1635, "encoder_q-layer.0": 139.9297, "encoder_q-layer.1": 164.7362, "encoder_q-layer.10": 96.5678, "encoder_q-layer.11": 254.8672, "encoder_q-layer.2": 204.6428, "encoder_q-layer.3": 216.9649, "encoder_q-layer.4": 237.6482, "encoder_q-layer.5": 227.3838, "encoder_q-layer.6": 214.4404, "encoder_q-layer.7": 239.6831, "encoder_q-layer.8": 210.4579, "encoder_q-layer.9": 103.5192, "epoch": 0.15, "inbatch_neg_score": 0.3572, "inbatch_pos_score": 0.9434, "learning_rate": 4.716666666666667e-05, "loss": 3.906, "norm_diff": 0.2043, "norm_loss": 0.0, "num_token_doc": 66.9583, "num_token_overlap": 14.5808, "num_token_query": 37.3011, "num_token_union": 65.4686, "num_word_context": 202.4596, "num_word_doc": 49.9557, "num_word_query": 27.9212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 292.7291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3547, "query_norm": 1.6358, "queue_k_norm": 1.4288, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3011, "sent_len_1": 66.9583, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5712, "stdk": 0.047, "stdq": 0.0445, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.9108, "doc_norm": 1.4281, "encoder_q-embeddings": 3293.3877, "encoder_q-layer.0": 2522.2744, "encoder_q-layer.1": 3312.322, "encoder_q-layer.10": 144.3678, "encoder_q-layer.11": 289.1708, "encoder_q-layer.2": 3841.9958, "encoder_q-layer.3": 4361.6025, "encoder_q-layer.4": 5525.0742, "encoder_q-layer.5": 5564.7227, "encoder_q-layer.6": 4738.5596, "encoder_q-layer.7": 5850.9683, "encoder_q-layer.8": 4141.9951, "encoder_q-layer.9": 619.8772, "epoch": 0.15, "inbatch_neg_score": 0.3448, "inbatch_pos_score": 0.9448, "learning_rate": 4.711111111111111e-05, "loss": 3.9108, "norm_diff": 0.1633, "norm_loss": 0.0, "num_token_doc": 66.8167, "num_token_overlap": 14.4986, "num_token_query": 37.0977, "num_token_union": 65.3503, "num_word_context": 202.2352, "num_word_doc": 49.8472, "num_word_query": 27.7698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5783.786, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3447, "query_norm": 1.5915, "queue_k_norm": 1.4307, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.0977, "sent_len_1": 66.8167, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.0469, "stdq": 0.0454, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.921, "doc_norm": 1.4246, "encoder_q-embeddings": 246.5596, "encoder_q-layer.0": 165.3413, "encoder_q-layer.1": 182.8254, "encoder_q-layer.10": 121.7002, "encoder_q-layer.11": 303.2123, "encoder_q-layer.2": 195.7623, "encoder_q-layer.3": 214.2569, "encoder_q-layer.4": 216.8824, "encoder_q-layer.5": 219.3552, "encoder_q-layer.6": 252.5988, "encoder_q-layer.7": 229.6816, "encoder_q-layer.8": 178.9478, "encoder_q-layer.9": 103.7017, "epoch": 0.15, "inbatch_neg_score": 0.34, "inbatch_pos_score": 0.9141, "learning_rate": 4.7055555555555555e-05, "loss": 3.921, "norm_diff": 0.1364, "norm_loss": 0.0, "num_token_doc": 66.826, "num_token_overlap": 14.5591, "num_token_query": 37.0698, "num_token_union": 65.272, "num_word_context": 202.0481, "num_word_doc": 49.8196, "num_word_query": 27.7244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 311.5709, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3362, "query_norm": 1.561, "queue_k_norm": 1.4294, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0698, "sent_len_1": 66.826, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1612, "stdk": 0.0467, "stdq": 0.0449, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9165, "doc_norm": 1.4311, "encoder_q-embeddings": 372.3647, "encoder_q-layer.0": 289.1353, "encoder_q-layer.1": 297.5882, "encoder_q-layer.10": 109.2931, "encoder_q-layer.11": 294.853, "encoder_q-layer.2": 343.4954, "encoder_q-layer.3": 323.2743, "encoder_q-layer.4": 339.0417, "encoder_q-layer.5": 377.9512, "encoder_q-layer.6": 361.3146, "encoder_q-layer.7": 448.6587, "encoder_q-layer.8": 291.5641, "encoder_q-layer.9": 129.3074, "epoch": 0.15, "inbatch_neg_score": 0.3236, "inbatch_pos_score": 0.9155, "learning_rate": 4.7e-05, "loss": 3.9165, "norm_diff": 0.0945, "norm_loss": 0.0, "num_token_doc": 66.7955, "num_token_overlap": 14.5108, "num_token_query": 37.0733, "num_token_union": 65.2503, "num_word_context": 202.3793, "num_word_doc": 49.8484, "num_word_query": 27.7517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 476.5891, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3213, "query_norm": 1.5256, "queue_k_norm": 1.4306, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0733, "sent_len_1": 66.7955, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2713, "stdk": 0.0469, "stdq": 0.044, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9101, "doc_norm": 1.4356, "encoder_q-embeddings": 322.0668, "encoder_q-layer.0": 220.856, "encoder_q-layer.1": 289.3929, "encoder_q-layer.10": 99.2275, "encoder_q-layer.11": 282.1333, "encoder_q-layer.2": 335.7833, "encoder_q-layer.3": 326.0231, "encoder_q-layer.4": 312.7284, "encoder_q-layer.5": 286.9356, "encoder_q-layer.6": 277.5174, "encoder_q-layer.7": 282.3162, "encoder_q-layer.8": 195.2212, "encoder_q-layer.9": 111.358, "epoch": 0.15, "inbatch_neg_score": 0.3371, "inbatch_pos_score": 0.9214, "learning_rate": 4.6944444444444446e-05, "loss": 3.9101, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 66.6528, "num_token_overlap": 14.6587, "num_token_query": 37.5656, "num_token_union": 65.3934, "num_word_context": 202.0775, "num_word_doc": 49.7446, "num_word_query": 28.1406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 405.7255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3362, "query_norm": 1.5235, "queue_k_norm": 1.4331, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5656, "sent_len_1": 66.6528, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3963, "stdk": 0.047, "stdq": 0.0438, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.917, "doc_norm": 1.436, "encoder_q-embeddings": 451.5254, "encoder_q-layer.0": 356.18, "encoder_q-layer.1": 372.0836, "encoder_q-layer.10": 101.5748, "encoder_q-layer.11": 299.1336, "encoder_q-layer.2": 415.1806, "encoder_q-layer.3": 412.2275, "encoder_q-layer.4": 350.2488, "encoder_q-layer.5": 261.3458, "encoder_q-layer.6": 246.8645, "encoder_q-layer.7": 240.9728, "encoder_q-layer.8": 213.7966, "encoder_q-layer.9": 116.1891, "epoch": 0.15, "inbatch_neg_score": 0.3259, "inbatch_pos_score": 0.8809, "learning_rate": 4.6888888888888895e-05, "loss": 3.917, "norm_diff": 0.034, "norm_loss": 0.0, "num_token_doc": 66.6137, "num_token_overlap": 14.4923, "num_token_query": 37.152, "num_token_union": 65.3148, "num_word_context": 202.1295, "num_word_doc": 49.7354, "num_word_query": 27.8096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 491.4314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3237, "query_norm": 1.47, "queue_k_norm": 1.4341, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.152, "sent_len_1": 66.6137, "sent_len_max_0": 128.0, "sent_len_max_1": 186.2925, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9, "doc_norm": 1.4365, "encoder_q-embeddings": 236.55, "encoder_q-layer.0": 172.0571, "encoder_q-layer.1": 161.4911, "encoder_q-layer.10": 151.3355, "encoder_q-layer.11": 344.3575, "encoder_q-layer.2": 182.8633, "encoder_q-layer.3": 206.3649, "encoder_q-layer.4": 242.5868, "encoder_q-layer.5": 218.1988, "encoder_q-layer.6": 239.6985, "encoder_q-layer.7": 237.8384, "encoder_q-layer.8": 235.3837, "encoder_q-layer.9": 147.3866, "epoch": 0.15, "inbatch_neg_score": 0.3119, "inbatch_pos_score": 0.9346, "learning_rate": 4.683333333333334e-05, "loss": 3.9, "norm_diff": 0.1256, "norm_loss": 0.0, "num_token_doc": 66.7532, "num_token_overlap": 14.5907, "num_token_query": 37.2754, "num_token_union": 65.3462, "num_word_context": 202.4372, "num_word_doc": 49.7925, "num_word_query": 27.8893, "postclip_grad_norm": 1.0, "preclip_grad_norm": 323.196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.311, "query_norm": 1.5621, "queue_k_norm": 1.4341, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2754, "sent_len_1": 66.7532, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2488, "stdk": 0.047, "stdq": 0.0462, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8955, "doc_norm": 1.4374, "encoder_q-embeddings": 593.2172, "encoder_q-layer.0": 407.3373, "encoder_q-layer.1": 467.8614, "encoder_q-layer.10": 104.6638, "encoder_q-layer.11": 303.2636, "encoder_q-layer.2": 550.2637, "encoder_q-layer.3": 606.7838, "encoder_q-layer.4": 733.884, "encoder_q-layer.5": 594.1827, "encoder_q-layer.6": 663.4669, "encoder_q-layer.7": 572.9405, "encoder_q-layer.8": 449.3645, "encoder_q-layer.9": 140.6178, "epoch": 0.15, "inbatch_neg_score": 0.3287, "inbatch_pos_score": 0.9233, "learning_rate": 4.677777777777778e-05, "loss": 3.8955, "norm_diff": 0.0793, "norm_loss": 0.0, "num_token_doc": 66.8847, "num_token_overlap": 14.6344, "num_token_query": 37.5448, "num_token_union": 65.528, "num_word_context": 202.7516, "num_word_doc": 49.9298, "num_word_query": 28.0954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 763.133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3276, "query_norm": 1.5166, "queue_k_norm": 1.4347, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5448, "sent_len_1": 66.8847, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0938, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8814, "doc_norm": 1.4352, "encoder_q-embeddings": 485.5845, "encoder_q-layer.0": 371.7324, "encoder_q-layer.1": 359.0024, "encoder_q-layer.10": 94.525, "encoder_q-layer.11": 275.1124, "encoder_q-layer.2": 387.5103, "encoder_q-layer.3": 347.8614, "encoder_q-layer.4": 330.1089, "encoder_q-layer.5": 290.2844, "encoder_q-layer.6": 232.4964, "encoder_q-layer.7": 222.4186, "encoder_q-layer.8": 161.8366, "encoder_q-layer.9": 84.7927, "epoch": 0.16, "inbatch_neg_score": 0.2928, "inbatch_pos_score": 0.8901, "learning_rate": 4.672222222222222e-05, "loss": 3.8814, "norm_diff": 0.0364, "norm_loss": 0.0, "num_token_doc": 66.9904, "num_token_overlap": 14.6194, "num_token_query": 37.4214, "num_token_union": 65.5659, "num_word_context": 202.6325, "num_word_doc": 49.956, "num_word_query": 28.0017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 481.1355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.291, "query_norm": 1.4716, "queue_k_norm": 1.435, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4214, "sent_len_1": 66.9904, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9212, "stdk": 0.047, "stdq": 0.0435, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8796, "doc_norm": 1.4304, "encoder_q-embeddings": 13230.498, "encoder_q-layer.0": 10258.0869, "encoder_q-layer.1": 12383.1152, "encoder_q-layer.10": 261.4801, "encoder_q-layer.11": 580.6661, "encoder_q-layer.2": 15875.0166, "encoder_q-layer.3": 17949.9648, "encoder_q-layer.4": 16920.8809, "encoder_q-layer.5": 16380.5508, "encoder_q-layer.6": 13622.7275, "encoder_q-layer.7": 9712.4297, "encoder_q-layer.8": 5616.6143, "encoder_q-layer.9": 990.7466, "epoch": 0.16, "inbatch_neg_score": 0.3062, "inbatch_pos_score": 0.8789, "learning_rate": 4.666666666666667e-05, "loss": 3.8796, "norm_diff": 0.0504, "norm_loss": 0.0, "num_token_doc": 66.7908, "num_token_overlap": 14.6265, "num_token_query": 37.3376, "num_token_union": 65.3558, "num_word_context": 202.29, "num_word_doc": 49.8019, "num_word_query": 27.9483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17979.4655, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3047, "query_norm": 1.4808, "queue_k_norm": 1.4338, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3376, "sent_len_1": 66.7908, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.3313, "stdk": 0.0469, "stdq": 0.0429, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.86, "doc_norm": 1.4254, "encoder_q-embeddings": 549.9355, "encoder_q-layer.0": 383.6872, "encoder_q-layer.1": 447.3526, "encoder_q-layer.10": 195.2887, "encoder_q-layer.11": 519.9769, "encoder_q-layer.2": 516.0514, "encoder_q-layer.3": 564.8769, "encoder_q-layer.4": 606.5592, "encoder_q-layer.5": 439.4635, "encoder_q-layer.6": 370.5915, "encoder_q-layer.7": 317.9612, "encoder_q-layer.8": 259.927, "encoder_q-layer.9": 161.7173, "epoch": 0.16, "inbatch_neg_score": 0.2908, "inbatch_pos_score": 0.8638, "learning_rate": 4.6611111111111114e-05, "loss": 3.86, "norm_diff": 0.0616, "norm_loss": 0.0, "num_token_doc": 66.7838, "num_token_overlap": 14.5928, "num_token_query": 37.3729, "num_token_union": 65.4242, "num_word_context": 202.3484, "num_word_doc": 49.8631, "num_word_query": 27.9762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 661.3017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.291, "query_norm": 1.487, "queue_k_norm": 1.4297, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3729, "sent_len_1": 66.7838, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3438, "stdk": 0.0468, "stdq": 0.0433, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8575, "doc_norm": 1.4238, "encoder_q-embeddings": 978.7664, "encoder_q-layer.0": 820.0262, "encoder_q-layer.1": 964.2199, "encoder_q-layer.10": 193.4753, "encoder_q-layer.11": 548.02, "encoder_q-layer.2": 1069.1121, "encoder_q-layer.3": 971.3118, "encoder_q-layer.4": 828.1387, "encoder_q-layer.5": 854.5012, "encoder_q-layer.6": 832.7293, "encoder_q-layer.7": 756.0288, "encoder_q-layer.8": 497.0495, "encoder_q-layer.9": 194.6547, "epoch": 0.16, "inbatch_neg_score": 0.2509, "inbatch_pos_score": 0.8213, "learning_rate": 4.6555555555555556e-05, "loss": 3.8575, "norm_diff": 0.0659, "norm_loss": 0.0, "num_token_doc": 66.6544, "num_token_overlap": 14.6065, "num_token_query": 37.4521, "num_token_union": 65.3694, "num_word_context": 201.9432, "num_word_doc": 49.759, "num_word_query": 28.0547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1207.8512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2485, "query_norm": 1.4898, "queue_k_norm": 1.4264, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4521, "sent_len_1": 66.6544, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1675, "stdk": 0.0468, "stdq": 0.0443, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8582, "doc_norm": 1.4239, "encoder_q-embeddings": 579.9049, "encoder_q-layer.0": 405.3686, "encoder_q-layer.1": 479.1088, "encoder_q-layer.10": 216.6164, "encoder_q-layer.11": 518.615, "encoder_q-layer.2": 595.9354, "encoder_q-layer.3": 613.7679, "encoder_q-layer.4": 712.5034, "encoder_q-layer.5": 739.5838, "encoder_q-layer.6": 651.6122, "encoder_q-layer.7": 559.8306, "encoder_q-layer.8": 361.7994, "encoder_q-layer.9": 195.9166, "epoch": 0.16, "inbatch_neg_score": 0.2306, "inbatch_pos_score": 0.8125, "learning_rate": 4.6500000000000005e-05, "loss": 3.8582, "norm_diff": 0.0727, "norm_loss": 0.0, "num_token_doc": 67.0822, "num_token_overlap": 14.7238, "num_token_query": 37.5908, "num_token_union": 65.5868, "num_word_context": 202.5013, "num_word_doc": 50.0266, "num_word_query": 28.1508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 789.6577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2283, "query_norm": 1.4966, "queue_k_norm": 1.422, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5908, "sent_len_1": 67.0822, "sent_len_max_0": 127.9963, "sent_len_max_1": 192.1687, "stdk": 0.0469, "stdq": 0.0444, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8661, "doc_norm": 1.416, "encoder_q-embeddings": 3273.9478, "encoder_q-layer.0": 2906.115, "encoder_q-layer.1": 2989.6338, "encoder_q-layer.10": 190.5345, "encoder_q-layer.11": 510.9085, "encoder_q-layer.2": 3119.251, "encoder_q-layer.3": 3160.0059, "encoder_q-layer.4": 2682.418, "encoder_q-layer.5": 2212.6125, "encoder_q-layer.6": 2071.6956, "encoder_q-layer.7": 1641.6456, "encoder_q-layer.8": 872.1649, "encoder_q-layer.9": 287.6038, "epoch": 0.16, "inbatch_neg_score": 0.2421, "inbatch_pos_score": 0.8149, "learning_rate": 4.644444444444445e-05, "loss": 3.8661, "norm_diff": 0.0851, "norm_loss": 0.0, "num_token_doc": 66.7465, "num_token_overlap": 14.5825, "num_token_query": 37.2492, "num_token_union": 65.3244, "num_word_context": 202.3909, "num_word_doc": 49.776, "num_word_query": 27.8768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3479.6401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2385, "query_norm": 1.5011, "queue_k_norm": 1.4177, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2492, "sent_len_1": 66.7465, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4025, "stdk": 0.0468, "stdq": 0.0439, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8915, "doc_norm": 1.4071, "encoder_q-embeddings": 1614.5159, "encoder_q-layer.0": 1110.6309, "encoder_q-layer.1": 1133.5594, "encoder_q-layer.10": 218.9359, "encoder_q-layer.11": 582.6201, "encoder_q-layer.2": 1263.2562, "encoder_q-layer.3": 1326.3542, "encoder_q-layer.4": 1504.6761, "encoder_q-layer.5": 1394.6833, "encoder_q-layer.6": 1262.9628, "encoder_q-layer.7": 1004.4968, "encoder_q-layer.8": 560.8448, "encoder_q-layer.9": 225.7753, "epoch": 0.16, "inbatch_neg_score": 0.2544, "inbatch_pos_score": 0.8037, "learning_rate": 4.638888888888889e-05, "loss": 3.8915, "norm_diff": 0.0933, "norm_loss": 0.0, "num_token_doc": 66.5764, "num_token_overlap": 14.5601, "num_token_query": 37.2737, "num_token_union": 65.2726, "num_word_context": 201.8447, "num_word_doc": 49.6722, "num_word_query": 27.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1692.9362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2515, "query_norm": 1.5004, "queue_k_norm": 1.4123, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2737, "sent_len_1": 66.5764, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0888, "stdk": 0.0465, "stdq": 0.0432, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8748, "doc_norm": 1.4064, "encoder_q-embeddings": 519.3248, "encoder_q-layer.0": 357.9628, "encoder_q-layer.1": 371.5154, "encoder_q-layer.10": 184.2976, "encoder_q-layer.11": 543.2374, "encoder_q-layer.2": 403.4854, "encoder_q-layer.3": 410.3374, "encoder_q-layer.4": 437.0498, "encoder_q-layer.5": 441.9111, "encoder_q-layer.6": 447.7603, "encoder_q-layer.7": 454.6151, "encoder_q-layer.8": 307.1753, "encoder_q-layer.9": 169.6669, "epoch": 0.16, "inbatch_neg_score": 0.2609, "inbatch_pos_score": 0.8389, "learning_rate": 4.633333333333333e-05, "loss": 3.8748, "norm_diff": 0.1079, "norm_loss": 0.0, "num_token_doc": 66.7452, "num_token_overlap": 14.6438, "num_token_query": 37.5654, "num_token_union": 65.483, "num_word_context": 202.63, "num_word_doc": 49.8191, "num_word_query": 28.1664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 620.9217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2588, "query_norm": 1.5143, "queue_k_norm": 1.4107, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5654, "sent_len_1": 66.7452, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7125, "stdk": 0.0467, "stdq": 0.0436, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8813, "doc_norm": 1.4012, "encoder_q-embeddings": 1248.0353, "encoder_q-layer.0": 945.247, "encoder_q-layer.1": 989.1241, "encoder_q-layer.10": 184.8756, "encoder_q-layer.11": 486.1906, "encoder_q-layer.2": 1114.0099, "encoder_q-layer.3": 1150.4142, "encoder_q-layer.4": 853.1668, "encoder_q-layer.5": 611.74, "encoder_q-layer.6": 551.3544, "encoder_q-layer.7": 573.8747, "encoder_q-layer.8": 491.3047, "encoder_q-layer.9": 191.9439, "epoch": 0.16, "inbatch_neg_score": 0.2626, "inbatch_pos_score": 0.8569, "learning_rate": 4.627777777777778e-05, "loss": 3.8813, "norm_diff": 0.1021, "norm_loss": 0.0, "num_token_doc": 66.8052, "num_token_overlap": 14.65, "num_token_query": 37.5012, "num_token_union": 65.4969, "num_word_context": 201.9926, "num_word_doc": 49.8574, "num_word_query": 28.0946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1238.9195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.5032, "queue_k_norm": 1.4077, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5012, "sent_len_1": 66.8052, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0987, "stdk": 0.0465, "stdq": 0.0441, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8812, "doc_norm": 1.409, "encoder_q-embeddings": 236.8262, "encoder_q-layer.0": 166.1498, "encoder_q-layer.1": 183.7367, "encoder_q-layer.10": 192.5225, "encoder_q-layer.11": 504.9743, "encoder_q-layer.2": 204.9387, "encoder_q-layer.3": 217.2924, "encoder_q-layer.4": 221.5173, "encoder_q-layer.5": 215.6656, "encoder_q-layer.6": 216.6122, "encoder_q-layer.7": 216.9621, "encoder_q-layer.8": 212.7642, "encoder_q-layer.9": 170.7463, "epoch": 0.16, "inbatch_neg_score": 0.2622, "inbatch_pos_score": 0.8604, "learning_rate": 4.6222222222222224e-05, "loss": 3.8812, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.9465, "num_token_overlap": 14.5866, "num_token_query": 37.41, "num_token_union": 65.5093, "num_word_context": 202.4291, "num_word_doc": 49.9416, "num_word_query": 28.0186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 372.6819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2625, "query_norm": 1.4951, "queue_k_norm": 1.4046, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.41, "sent_len_1": 66.9465, "sent_len_max_0": 128.0, "sent_len_max_1": 189.69, "stdk": 0.0469, "stdq": 0.0442, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8485, "doc_norm": 1.3996, "encoder_q-embeddings": 573.1556, "encoder_q-layer.0": 393.9337, "encoder_q-layer.1": 458.758, "encoder_q-layer.10": 186.4754, "encoder_q-layer.11": 501.8992, "encoder_q-layer.2": 520.4692, "encoder_q-layer.3": 540.822, "encoder_q-layer.4": 564.5187, "encoder_q-layer.5": 547.2355, "encoder_q-layer.6": 448.758, "encoder_q-layer.7": 366.4618, "encoder_q-layer.8": 282.0858, "encoder_q-layer.9": 171.8197, "epoch": 0.16, "inbatch_neg_score": 0.2649, "inbatch_pos_score": 0.8521, "learning_rate": 4.6166666666666666e-05, "loss": 3.8485, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.8, "num_token_overlap": 14.6696, "num_token_query": 37.5416, "num_token_union": 65.4558, "num_word_context": 202.6184, "num_word_doc": 49.8364, "num_word_query": 28.1048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 683.4732, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.5075, "queue_k_norm": 1.4027, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5416, "sent_len_1": 66.8, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6725, "stdk": 0.0466, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8549, "doc_norm": 1.4019, "encoder_q-embeddings": 324.7046, "encoder_q-layer.0": 241.5096, "encoder_q-layer.1": 248.2584, "encoder_q-layer.10": 187.4162, "encoder_q-layer.11": 505.6439, "encoder_q-layer.2": 232.7957, "encoder_q-layer.3": 234.1063, "encoder_q-layer.4": 244.7095, "encoder_q-layer.5": 229.719, "encoder_q-layer.6": 247.5828, "encoder_q-layer.7": 222.019, "encoder_q-layer.8": 217.1555, "encoder_q-layer.9": 171.8898, "epoch": 0.17, "inbatch_neg_score": 0.2709, "inbatch_pos_score": 0.8721, "learning_rate": 4.6111111111111115e-05, "loss": 3.8549, "norm_diff": 0.1155, "norm_loss": 0.0, "num_token_doc": 66.6202, "num_token_overlap": 14.5946, "num_token_query": 37.2662, "num_token_union": 65.2129, "num_word_context": 202.0207, "num_word_doc": 49.7112, "num_word_query": 27.9005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 418.6498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2695, "query_norm": 1.5174, "queue_k_norm": 1.4019, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2662, "sent_len_1": 66.6202, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.5712, "stdk": 0.0467, "stdq": 0.0459, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.899, "doc_norm": 1.4028, "encoder_q-embeddings": 239.4483, "encoder_q-layer.0": 168.2031, "encoder_q-layer.1": 180.1696, "encoder_q-layer.10": 177.0459, "encoder_q-layer.11": 484.6062, "encoder_q-layer.2": 197.45, "encoder_q-layer.3": 210.1406, "encoder_q-layer.4": 204.0969, "encoder_q-layer.5": 198.5226, "encoder_q-layer.6": 198.7056, "encoder_q-layer.7": 178.9525, "encoder_q-layer.8": 187.4297, "encoder_q-layer.9": 154.1875, "epoch": 0.17, "inbatch_neg_score": 0.2699, "inbatch_pos_score": 0.8501, "learning_rate": 4.605555555555556e-05, "loss": 3.899, "norm_diff": 0.0346, "norm_loss": 0.0, "num_token_doc": 66.759, "num_token_overlap": 14.6116, "num_token_query": 37.3117, "num_token_union": 65.2927, "num_word_context": 202.1024, "num_word_doc": 49.7638, "num_word_query": 27.9177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 361.6065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2686, "query_norm": 1.4373, "queue_k_norm": 1.4007, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3117, "sent_len_1": 66.759, "sent_len_max_0": 128.0, "sent_len_max_1": 192.5813, "stdk": 0.0467, "stdq": 0.0435, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8705, "doc_norm": 1.4065, "encoder_q-embeddings": 4077.5496, "encoder_q-layer.0": 3143.4392, "encoder_q-layer.1": 3430.2327, "encoder_q-layer.10": 197.4209, "encoder_q-layer.11": 537.0544, "encoder_q-layer.2": 4026.4973, "encoder_q-layer.3": 3216.1626, "encoder_q-layer.4": 2908.9873, "encoder_q-layer.5": 2922.2036, "encoder_q-layer.6": 2332.4932, "encoder_q-layer.7": 1356.1586, "encoder_q-layer.8": 768.4878, "encoder_q-layer.9": 253.5284, "epoch": 0.17, "inbatch_neg_score": 0.2795, "inbatch_pos_score": 0.8672, "learning_rate": 4.600000000000001e-05, "loss": 3.8705, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.7774, "num_token_overlap": 14.5842, "num_token_query": 37.3237, "num_token_union": 65.357, "num_word_context": 202.3125, "num_word_doc": 49.8103, "num_word_query": 27.936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4037.7001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.4699, "queue_k_norm": 1.4018, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3237, "sent_len_1": 66.7774, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1538, "stdk": 0.0469, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8843, "doc_norm": 1.4018, "encoder_q-embeddings": 2130.6184, "encoder_q-layer.0": 1507.2656, "encoder_q-layer.1": 1381.0867, "encoder_q-layer.10": 192.6136, "encoder_q-layer.11": 474.8625, "encoder_q-layer.2": 1608.5966, "encoder_q-layer.3": 1687.8407, "encoder_q-layer.4": 1630.4257, "encoder_q-layer.5": 1467.4761, "encoder_q-layer.6": 1268.8173, "encoder_q-layer.7": 1291.3274, "encoder_q-layer.8": 630.7064, "encoder_q-layer.9": 204.2016, "epoch": 0.17, "inbatch_neg_score": 0.278, "inbatch_pos_score": 0.8579, "learning_rate": 4.594444444444444e-05, "loss": 3.8843, "norm_diff": 0.0514, "norm_loss": 0.0, "num_token_doc": 66.7515, "num_token_overlap": 14.5959, "num_token_query": 37.2583, "num_token_union": 65.323, "num_word_context": 202.3592, "num_word_doc": 49.8213, "num_word_query": 27.9115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.7378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2751, "query_norm": 1.4531, "queue_k_norm": 1.4015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2583, "sent_len_1": 66.7515, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0513, "stdk": 0.0467, "stdq": 0.0443, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8592, "doc_norm": 1.4019, "encoder_q-embeddings": 952.7493, "encoder_q-layer.0": 739.0386, "encoder_q-layer.1": 804.0008, "encoder_q-layer.10": 204.5284, "encoder_q-layer.11": 578.7832, "encoder_q-layer.2": 884.231, "encoder_q-layer.3": 874.3622, "encoder_q-layer.4": 900.3239, "encoder_q-layer.5": 769.9785, "encoder_q-layer.6": 671.3276, "encoder_q-layer.7": 525.6013, "encoder_q-layer.8": 343.114, "encoder_q-layer.9": 197.4204, "epoch": 0.17, "inbatch_neg_score": 0.2931, "inbatch_pos_score": 0.8745, "learning_rate": 4.588888888888889e-05, "loss": 3.8592, "norm_diff": 0.0674, "norm_loss": 0.0, "num_token_doc": 66.839, "num_token_overlap": 14.642, "num_token_query": 37.3915, "num_token_union": 65.3872, "num_word_context": 202.3774, "num_word_doc": 49.8982, "num_word_query": 28.0051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1062.4118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2905, "query_norm": 1.4693, "queue_k_norm": 1.4009, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3915, "sent_len_1": 66.839, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.0337, "stdk": 0.0467, "stdq": 0.0451, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8902, "doc_norm": 1.4006, "encoder_q-embeddings": 739.1095, "encoder_q-layer.0": 509.0883, "encoder_q-layer.1": 538.1323, "encoder_q-layer.10": 192.5695, "encoder_q-layer.11": 530.6831, "encoder_q-layer.2": 562.4892, "encoder_q-layer.3": 578.5336, "encoder_q-layer.4": 489.4987, "encoder_q-layer.5": 452.562, "encoder_q-layer.6": 392.907, "encoder_q-layer.7": 337.8378, "encoder_q-layer.8": 255.5124, "encoder_q-layer.9": 188.6309, "epoch": 0.17, "inbatch_neg_score": 0.291, "inbatch_pos_score": 0.8608, "learning_rate": 4.5833333333333334e-05, "loss": 3.8902, "norm_diff": 0.0212, "norm_loss": 0.0, "num_token_doc": 66.9012, "num_token_overlap": 14.5661, "num_token_query": 37.3582, "num_token_union": 65.4814, "num_word_context": 202.4301, "num_word_doc": 49.8977, "num_word_query": 27.9598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 736.893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2905, "query_norm": 1.4206, "queue_k_norm": 1.4028, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3582, "sent_len_1": 66.9012, "sent_len_max_0": 128.0, "sent_len_max_1": 190.425, "stdk": 0.0467, "stdq": 0.0434, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.8962, "doc_norm": 1.4036, "encoder_q-embeddings": 4140.8311, "encoder_q-layer.0": 3770.1279, "encoder_q-layer.1": 4043.0632, "encoder_q-layer.10": 192.4042, "encoder_q-layer.11": 510.8497, "encoder_q-layer.2": 4932.1108, "encoder_q-layer.3": 4906.4668, "encoder_q-layer.4": 4741.2485, "encoder_q-layer.5": 4518.6187, "encoder_q-layer.6": 3049.439, "encoder_q-layer.7": 2017.5388, "encoder_q-layer.8": 1057.7953, "encoder_q-layer.9": 346.9813, "epoch": 0.17, "inbatch_neg_score": 0.2823, "inbatch_pos_score": 0.8755, "learning_rate": 4.577777777777778e-05, "loss": 3.8962, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.6085, "num_token_overlap": 14.6066, "num_token_query": 37.4107, "num_token_union": 65.3224, "num_word_context": 202.265, "num_word_doc": 49.7261, "num_word_query": 28.0043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5185.6993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2805, "query_norm": 1.4611, "queue_k_norm": 1.4013, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4107, "sent_len_1": 66.6085, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.375, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.907, "doc_norm": 1.4015, "encoder_q-embeddings": 491.9725, "encoder_q-layer.0": 344.0883, "encoder_q-layer.1": 368.8487, "encoder_q-layer.10": 238.7576, "encoder_q-layer.11": 562.4804, "encoder_q-layer.2": 396.6494, "encoder_q-layer.3": 410.8808, "encoder_q-layer.4": 417.882, "encoder_q-layer.5": 387.0194, "encoder_q-layer.6": 308.8958, "encoder_q-layer.7": 254.6266, "encoder_q-layer.8": 247.8181, "encoder_q-layer.9": 188.8274, "epoch": 0.17, "inbatch_neg_score": 0.2677, "inbatch_pos_score": 0.8604, "learning_rate": 4.572222222222222e-05, "loss": 3.907, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.8209, "num_token_overlap": 14.5475, "num_token_query": 37.3335, "num_token_union": 65.4662, "num_word_context": 202.5711, "num_word_doc": 49.8838, "num_word_query": 27.9586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 562.2707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2644, "query_norm": 1.4543, "queue_k_norm": 1.4036, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3335, "sent_len_1": 66.8209, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.685, "stdk": 0.0467, "stdq": 0.0451, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8738, "doc_norm": 1.4053, "encoder_q-embeddings": 868.0796, "encoder_q-layer.0": 599.9919, "encoder_q-layer.1": 677.216, "encoder_q-layer.10": 180.422, "encoder_q-layer.11": 504.8581, "encoder_q-layer.2": 816.6591, "encoder_q-layer.3": 783.6783, "encoder_q-layer.4": 806.9374, "encoder_q-layer.5": 792.235, "encoder_q-layer.6": 767.5429, "encoder_q-layer.7": 687.7762, "encoder_q-layer.8": 392.9479, "encoder_q-layer.9": 189.7601, "epoch": 0.17, "inbatch_neg_score": 0.2672, "inbatch_pos_score": 0.8496, "learning_rate": 4.566666666666667e-05, "loss": 3.8738, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 67.2151, "num_token_overlap": 14.6738, "num_token_query": 37.5414, "num_token_union": 65.6852, "num_word_context": 203.0972, "num_word_doc": 50.1627, "num_word_query": 28.1164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 986.0014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2678, "query_norm": 1.4444, "queue_k_norm": 1.4032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5414, "sent_len_1": 67.2151, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5462, "stdk": 0.0469, "stdq": 0.0444, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8916, "doc_norm": 1.4048, "encoder_q-embeddings": 657.4396, "encoder_q-layer.0": 467.8706, "encoder_q-layer.1": 475.7433, "encoder_q-layer.10": 192.3292, "encoder_q-layer.11": 560.231, "encoder_q-layer.2": 546.3126, "encoder_q-layer.3": 632.65, "encoder_q-layer.4": 528.8092, "encoder_q-layer.5": 493.7711, "encoder_q-layer.6": 393.3607, "encoder_q-layer.7": 330.3017, "encoder_q-layer.8": 273.0636, "encoder_q-layer.9": 184.6013, "epoch": 0.17, "inbatch_neg_score": 0.2699, "inbatch_pos_score": 0.855, "learning_rate": 4.561111111111112e-05, "loss": 3.8916, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.8043, "num_token_overlap": 14.5538, "num_token_query": 37.2347, "num_token_union": 65.3709, "num_word_context": 202.0657, "num_word_doc": 49.8255, "num_word_query": 27.871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 723.6084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2686, "query_norm": 1.4401, "queue_k_norm": 1.4029, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2347, "sent_len_1": 66.8043, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5925, "stdk": 0.0469, "stdq": 0.0438, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8727, "doc_norm": 1.399, "encoder_q-embeddings": 12524.1367, "encoder_q-layer.0": 8900.9434, "encoder_q-layer.1": 10035.7266, "encoder_q-layer.10": 438.761, "encoder_q-layer.11": 1081.1219, "encoder_q-layer.2": 11526.1113, "encoder_q-layer.3": 11464.3799, "encoder_q-layer.4": 11547.2998, "encoder_q-layer.5": 12114.8896, "encoder_q-layer.6": 8529.8115, "encoder_q-layer.7": 5923.7402, "encoder_q-layer.8": 3458.2368, "encoder_q-layer.9": 631.7262, "epoch": 0.18, "inbatch_neg_score": 0.2645, "inbatch_pos_score": 0.833, "learning_rate": 4.555555555555556e-05, "loss": 3.8727, "norm_diff": 0.0109, "norm_loss": 0.0, "num_token_doc": 66.5198, "num_token_overlap": 14.6137, "num_token_query": 37.3755, "num_token_union": 65.2344, "num_word_context": 202.0683, "num_word_doc": 49.6096, "num_word_query": 27.9831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13527.8975, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2646, "query_norm": 1.4018, "queue_k_norm": 1.4025, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3755, "sent_len_1": 66.5198, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.145, "stdk": 0.0467, "stdq": 0.0428, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.9269, "doc_norm": 1.4052, "encoder_q-embeddings": 2037.1527, "encoder_q-layer.0": 1406.6914, "encoder_q-layer.1": 1642.6384, "encoder_q-layer.10": 354.5718, "encoder_q-layer.11": 921.778, "encoder_q-layer.2": 1925.5686, "encoder_q-layer.3": 1982.1981, "encoder_q-layer.4": 2143.9233, "encoder_q-layer.5": 2097.5361, "encoder_q-layer.6": 1436.1782, "encoder_q-layer.7": 939.008, "encoder_q-layer.8": 649.212, "encoder_q-layer.9": 368.8454, "epoch": 0.18, "inbatch_neg_score": 0.2327, "inbatch_pos_score": 0.7993, "learning_rate": 4.55e-05, "loss": 3.9269, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.7405, "num_token_overlap": 14.567, "num_token_query": 37.3017, "num_token_union": 65.3568, "num_word_context": 202.3063, "num_word_doc": 49.8079, "num_word_query": 27.9383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2287.5742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2325, "query_norm": 1.4118, "queue_k_norm": 1.4015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3017, "sent_len_1": 66.7405, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8625, "stdk": 0.047, "stdq": 0.0432, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.872, "doc_norm": 1.3988, "encoder_q-embeddings": 1338.9454, "encoder_q-layer.0": 1025.3062, "encoder_q-layer.1": 1076.1904, "encoder_q-layer.10": 355.8911, "encoder_q-layer.11": 956.2907, "encoder_q-layer.2": 1154.9043, "encoder_q-layer.3": 1123.4939, "encoder_q-layer.4": 1147.9371, "encoder_q-layer.5": 1137.5912, "encoder_q-layer.6": 889.8519, "encoder_q-layer.7": 771.9941, "encoder_q-layer.8": 535.7369, "encoder_q-layer.9": 328.0527, "epoch": 0.18, "inbatch_neg_score": 0.2539, "inbatch_pos_score": 0.8179, "learning_rate": 4.5444444444444444e-05, "loss": 3.872, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.8074, "num_token_overlap": 14.6118, "num_token_query": 37.4444, "num_token_union": 65.4507, "num_word_context": 202.2435, "num_word_doc": 49.8345, "num_word_query": 28.0561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1470.4968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2527, "query_norm": 1.3972, "queue_k_norm": 1.4016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4444, "sent_len_1": 66.8074, "sent_len_max_0": 127.9887, "sent_len_max_1": 191.2837, "stdk": 0.0467, "stdq": 0.043, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.886, "doc_norm": 1.4061, "encoder_q-embeddings": 1123.0525, "encoder_q-layer.0": 803.3347, "encoder_q-layer.1": 799.8953, "encoder_q-layer.10": 374.2834, "encoder_q-layer.11": 1019.4436, "encoder_q-layer.2": 866.0559, "encoder_q-layer.3": 763.5327, "encoder_q-layer.4": 730.2485, "encoder_q-layer.5": 709.7676, "encoder_q-layer.6": 675.801, "encoder_q-layer.7": 613.8483, "encoder_q-layer.8": 499.9727, "encoder_q-layer.9": 373.3752, "epoch": 0.18, "inbatch_neg_score": 0.2593, "inbatch_pos_score": 0.8115, "learning_rate": 4.538888888888889e-05, "loss": 3.886, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 67.0549, "num_token_overlap": 14.5183, "num_token_query": 37.4438, "num_token_union": 65.617, "num_word_context": 202.4185, "num_word_doc": 49.9955, "num_word_query": 28.0373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1167.6633, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2576, "query_norm": 1.3859, "queue_k_norm": 1.4007, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4438, "sent_len_1": 67.0549, "sent_len_max_0": 128.0, "sent_len_max_1": 189.58, "stdk": 0.047, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.8433, "doc_norm": 1.3991, "encoder_q-embeddings": 2183.3, "encoder_q-layer.0": 1502.6198, "encoder_q-layer.1": 1818.7986, "encoder_q-layer.10": 356.6124, "encoder_q-layer.11": 1017.0446, "encoder_q-layer.2": 1993.5507, "encoder_q-layer.3": 2334.9133, "encoder_q-layer.4": 2343.5747, "encoder_q-layer.5": 1954.5946, "encoder_q-layer.6": 1798.6176, "encoder_q-layer.7": 1273.3802, "encoder_q-layer.8": 898.8586, "encoder_q-layer.9": 454.6095, "epoch": 0.18, "inbatch_neg_score": 0.2394, "inbatch_pos_score": 0.7998, "learning_rate": 4.5333333333333335e-05, "loss": 3.8433, "norm_diff": 0.013, "norm_loss": 0.0, "num_token_doc": 67.0069, "num_token_overlap": 14.6329, "num_token_query": 37.4729, "num_token_union": 65.6123, "num_word_context": 202.5942, "num_word_doc": 50.0305, "num_word_query": 28.0472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2520.0759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.3929, "queue_k_norm": 1.4019, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4729, "sent_len_1": 67.0069, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.625, "stdk": 0.0468, "stdq": 0.0428, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8558, "doc_norm": 1.4019, "encoder_q-embeddings": 1673.8141, "encoder_q-layer.0": 1127.2798, "encoder_q-layer.1": 1397.2303, "encoder_q-layer.10": 363.7082, "encoder_q-layer.11": 1070.8627, "encoder_q-layer.2": 1426.5214, "encoder_q-layer.3": 1642.6099, "encoder_q-layer.4": 1768.767, "encoder_q-layer.5": 1492.4233, "encoder_q-layer.6": 1457.8009, "encoder_q-layer.7": 1475.4742, "encoder_q-layer.8": 1058.9811, "encoder_q-layer.9": 458.5441, "epoch": 0.18, "inbatch_neg_score": 0.2509, "inbatch_pos_score": 0.8389, "learning_rate": 4.527777777777778e-05, "loss": 3.8558, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.7311, "num_token_overlap": 14.5744, "num_token_query": 37.2719, "num_token_union": 65.3465, "num_word_context": 202.4089, "num_word_doc": 49.7979, "num_word_query": 27.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2008.2597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2502, "query_norm": 1.4337, "queue_k_norm": 1.4001, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2719, "sent_len_1": 66.7311, "sent_len_max_0": 128.0, "sent_len_max_1": 188.625, "stdk": 0.0469, "stdq": 0.0438, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8757, "doc_norm": 1.4025, "encoder_q-embeddings": 688.9067, "encoder_q-layer.0": 510.5059, "encoder_q-layer.1": 554.5079, "encoder_q-layer.10": 357.6509, "encoder_q-layer.11": 1007.9579, "encoder_q-layer.2": 511.9358, "encoder_q-layer.3": 484.9735, "encoder_q-layer.4": 478.2708, "encoder_q-layer.5": 455.0928, "encoder_q-layer.6": 406.1815, "encoder_q-layer.7": 367.2512, "encoder_q-layer.8": 401.9606, "encoder_q-layer.9": 318.0563, "epoch": 0.18, "inbatch_neg_score": 0.2428, "inbatch_pos_score": 0.8169, "learning_rate": 4.522222222222223e-05, "loss": 3.8757, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.7746, "num_token_overlap": 14.5552, "num_token_query": 37.2657, "num_token_union": 65.373, "num_word_context": 202.3138, "num_word_doc": 49.7524, "num_word_query": 27.8839, "postclip_grad_norm": 1.0, "preclip_grad_norm": 835.7984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2404, "query_norm": 1.4103, "queue_k_norm": 1.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2657, "sent_len_1": 66.7746, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.9313, "stdk": 0.047, "stdq": 0.0435, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8989, "doc_norm": 1.388, "encoder_q-embeddings": 1439.1635, "encoder_q-layer.0": 973.9892, "encoder_q-layer.1": 934.6395, "encoder_q-layer.10": 350.1937, "encoder_q-layer.11": 976.5249, "encoder_q-layer.2": 984.35, "encoder_q-layer.3": 1003.1753, "encoder_q-layer.4": 1056.1609, "encoder_q-layer.5": 979.4615, "encoder_q-layer.6": 835.4103, "encoder_q-layer.7": 697.2384, "encoder_q-layer.8": 507.0732, "encoder_q-layer.9": 327.3307, "epoch": 0.18, "inbatch_neg_score": 0.2315, "inbatch_pos_score": 0.7896, "learning_rate": 4.516666666666667e-05, "loss": 3.8989, "norm_diff": 0.014, "norm_loss": 0.0, "num_token_doc": 66.6732, "num_token_overlap": 14.5569, "num_token_query": 37.4042, "num_token_union": 65.3937, "num_word_context": 202.4404, "num_word_doc": 49.7383, "num_word_query": 28.0286, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1423.0719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2311, "query_norm": 1.3791, "queue_k_norm": 1.3989, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4042, "sent_len_1": 66.6732, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9563, "stdk": 0.0465, "stdq": 0.0429, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8604, "doc_norm": 1.4003, "encoder_q-embeddings": 2606.5122, "encoder_q-layer.0": 2035.8607, "encoder_q-layer.1": 2045.2029, "encoder_q-layer.10": 371.8781, "encoder_q-layer.11": 1049.0117, "encoder_q-layer.2": 2453.4045, "encoder_q-layer.3": 2352.969, "encoder_q-layer.4": 2458.1267, "encoder_q-layer.5": 2000.7461, "encoder_q-layer.6": 1792.4999, "encoder_q-layer.7": 1463.8943, "encoder_q-layer.8": 770.575, "encoder_q-layer.9": 400.2055, "epoch": 0.18, "inbatch_neg_score": 0.2356, "inbatch_pos_score": 0.8228, "learning_rate": 4.511111111111112e-05, "loss": 3.8604, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.7475, "num_token_overlap": 14.6362, "num_token_query": 37.3854, "num_token_union": 65.3997, "num_word_context": 202.3453, "num_word_doc": 49.8135, "num_word_query": 28.0353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2793.4273, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.4146, "queue_k_norm": 1.3996, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3854, "sent_len_1": 66.7475, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9863, "stdk": 0.047, "stdq": 0.0438, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8634, "doc_norm": 1.3967, "encoder_q-embeddings": 7386.0762, "encoder_q-layer.0": 5548.1895, "encoder_q-layer.1": 6148.4614, "encoder_q-layer.10": 428.3735, "encoder_q-layer.11": 1014.2686, "encoder_q-layer.2": 7263.0317, "encoder_q-layer.3": 7339.9307, "encoder_q-layer.4": 8338.5459, "encoder_q-layer.5": 6352.8555, "encoder_q-layer.6": 4484.5073, "encoder_q-layer.7": 2851.5022, "encoder_q-layer.8": 1620.4885, "encoder_q-layer.9": 582.1348, "epoch": 0.18, "inbatch_neg_score": 0.243, "inbatch_pos_score": 0.8271, "learning_rate": 4.5055555555555554e-05, "loss": 3.8634, "norm_diff": 0.0105, "norm_loss": 0.0, "num_token_doc": 66.6352, "num_token_overlap": 14.5698, "num_token_query": 37.3892, "num_token_union": 65.3391, "num_word_context": 202.3674, "num_word_doc": 49.6905, "num_word_query": 27.9626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8057.7313, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2422, "query_norm": 1.402, "queue_k_norm": 1.396, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3892, "sent_len_1": 66.6352, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1675, "stdk": 0.0469, "stdq": 0.0437, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8826, "doc_norm": 1.3956, "encoder_q-embeddings": 5670.498, "encoder_q-layer.0": 4342.3667, "encoder_q-layer.1": 5039.4937, "encoder_q-layer.10": 418.4722, "encoder_q-layer.11": 1037.5161, "encoder_q-layer.2": 5354.0986, "encoder_q-layer.3": 5848.1641, "encoder_q-layer.4": 6181.5557, "encoder_q-layer.5": 5859.7939, "encoder_q-layer.6": 5393.7412, "encoder_q-layer.7": 4785.5186, "encoder_q-layer.8": 2423.4629, "encoder_q-layer.9": 650.099, "epoch": 0.19, "inbatch_neg_score": 0.2358, "inbatch_pos_score": 0.8086, "learning_rate": 4.5e-05, "loss": 3.8826, "norm_diff": 0.0101, "norm_loss": 0.0, "num_token_doc": 66.8726, "num_token_overlap": 14.5011, "num_token_query": 37.057, "num_token_union": 65.3456, "num_word_context": 202.3304, "num_word_doc": 49.8739, "num_word_query": 27.7332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6819.1102, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2367, "query_norm": 1.392, "queue_k_norm": 1.3977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.057, "sent_len_1": 66.8726, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5225, "stdk": 0.0469, "stdq": 0.0432, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.8739, "doc_norm": 1.4042, "encoder_q-embeddings": 2208.3638, "encoder_q-layer.0": 1575.4727, "encoder_q-layer.1": 1769.3895, "encoder_q-layer.10": 373.2646, "encoder_q-layer.11": 1082.4374, "encoder_q-layer.2": 2254.1113, "encoder_q-layer.3": 2728.7412, "encoder_q-layer.4": 3082.1655, "encoder_q-layer.5": 3343.689, "encoder_q-layer.6": 2475.3535, "encoder_q-layer.7": 2049.7297, "encoder_q-layer.8": 1118.9886, "encoder_q-layer.9": 504.4774, "epoch": 0.19, "inbatch_neg_score": 0.2449, "inbatch_pos_score": 0.8271, "learning_rate": 4.4944444444444445e-05, "loss": 3.8739, "norm_diff": 0.0196, "norm_loss": 0.0, "num_token_doc": 66.7845, "num_token_overlap": 14.5907, "num_token_query": 37.3844, "num_token_union": 65.402, "num_word_context": 202.6361, "num_word_doc": 49.8097, "num_word_query": 27.9915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3048.3461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2441, "query_norm": 1.4199, "queue_k_norm": 1.3992, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3844, "sent_len_1": 66.7845, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8688, "stdk": 0.0472, "stdq": 0.0443, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.9039, "doc_norm": 1.397, "encoder_q-embeddings": 6520.126, "encoder_q-layer.0": 4844.2739, "encoder_q-layer.1": 4269.5352, "encoder_q-layer.10": 373.5397, "encoder_q-layer.11": 1040.0361, "encoder_q-layer.2": 4663.9419, "encoder_q-layer.3": 4975.3477, "encoder_q-layer.4": 5071.8564, "encoder_q-layer.5": 5238.8462, "encoder_q-layer.6": 6121.083, "encoder_q-layer.7": 5007.5571, "encoder_q-layer.8": 1701.6315, "encoder_q-layer.9": 516.1119, "epoch": 0.19, "inbatch_neg_score": 0.2606, "inbatch_pos_score": 0.8354, "learning_rate": 4.4888888888888894e-05, "loss": 3.9039, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.8592, "num_token_overlap": 14.623, "num_token_query": 37.4174, "num_token_union": 65.4563, "num_word_context": 202.1511, "num_word_doc": 49.8804, "num_word_query": 28.0051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6766.9529, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.261, "query_norm": 1.4003, "queue_k_norm": 1.3982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4174, "sent_len_1": 66.8592, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.33, "stdk": 0.047, "stdq": 0.0432, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8873, "doc_norm": 1.3963, "encoder_q-embeddings": 1673.4232, "encoder_q-layer.0": 1184.002, "encoder_q-layer.1": 1371.2499, "encoder_q-layer.10": 367.6439, "encoder_q-layer.11": 1020.3686, "encoder_q-layer.2": 1540.2354, "encoder_q-layer.3": 1641.3647, "encoder_q-layer.4": 1396.5139, "encoder_q-layer.5": 1310.3622, "encoder_q-layer.6": 1311.0221, "encoder_q-layer.7": 1087.8993, "encoder_q-layer.8": 790.5227, "encoder_q-layer.9": 366.214, "epoch": 0.19, "inbatch_neg_score": 0.2667, "inbatch_pos_score": 0.8184, "learning_rate": 4.483333333333333e-05, "loss": 3.8873, "norm_diff": 0.0123, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 14.6215, "num_token_query": 37.3977, "num_token_union": 65.4226, "num_word_context": 202.4372, "num_word_doc": 49.8408, "num_word_query": 27.9949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1907.7146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.3894, "queue_k_norm": 1.3984, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3977, "sent_len_1": 66.762, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5225, "stdk": 0.0469, "stdq": 0.0427, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8649, "doc_norm": 1.3956, "encoder_q-embeddings": 1230.0216, "encoder_q-layer.0": 952.9457, "encoder_q-layer.1": 1014.6591, "encoder_q-layer.10": 345.4135, "encoder_q-layer.11": 987.0247, "encoder_q-layer.2": 1049.3956, "encoder_q-layer.3": 1039.9246, "encoder_q-layer.4": 1134.2501, "encoder_q-layer.5": 1097.6066, "encoder_q-layer.6": 722.5807, "encoder_q-layer.7": 493.3924, "encoder_q-layer.8": 471.8635, "encoder_q-layer.9": 339.7781, "epoch": 0.19, "inbatch_neg_score": 0.2788, "inbatch_pos_score": 0.8574, "learning_rate": 4.477777777777778e-05, "loss": 3.8649, "norm_diff": 0.0455, "norm_loss": 0.0, "num_token_doc": 66.5263, "num_token_overlap": 14.511, "num_token_query": 37.2402, "num_token_union": 65.2254, "num_word_context": 201.9471, "num_word_doc": 49.6335, "num_word_query": 27.8814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1389.4563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2771, "query_norm": 1.4411, "queue_k_norm": 1.3995, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2402, "sent_len_1": 66.5263, "sent_len_max_0": 127.995, "sent_len_max_1": 190.02, "stdk": 0.0468, "stdq": 0.0443, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8562, "doc_norm": 1.3997, "encoder_q-embeddings": 2887.0667, "encoder_q-layer.0": 2001.6393, "encoder_q-layer.1": 2283.3337, "encoder_q-layer.10": 370.1569, "encoder_q-layer.11": 1012.6234, "encoder_q-layer.2": 2443.8845, "encoder_q-layer.3": 2394.229, "encoder_q-layer.4": 2724.4834, "encoder_q-layer.5": 2305.1018, "encoder_q-layer.6": 1783.7605, "encoder_q-layer.7": 1003.2383, "encoder_q-layer.8": 609.1725, "encoder_q-layer.9": 352.1462, "epoch": 0.19, "inbatch_neg_score": 0.2794, "inbatch_pos_score": 0.855, "learning_rate": 4.472222222222223e-05, "loss": 3.8562, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.7035, "num_token_overlap": 14.5667, "num_token_query": 37.3475, "num_token_union": 65.363, "num_word_context": 202.3327, "num_word_doc": 49.7631, "num_word_query": 27.956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2920.5721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2781, "query_norm": 1.4341, "queue_k_norm": 1.3983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3475, "sent_len_1": 66.7035, "sent_len_max_0": 128.0, "sent_len_max_1": 190.13, "stdk": 0.047, "stdq": 0.0437, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8435, "doc_norm": 1.4034, "encoder_q-embeddings": 664.7312, "encoder_q-layer.0": 446.9622, "encoder_q-layer.1": 507.3156, "encoder_q-layer.10": 459.4748, "encoder_q-layer.11": 1079.7593, "encoder_q-layer.2": 590.0707, "encoder_q-layer.3": 644.96, "encoder_q-layer.4": 678.7877, "encoder_q-layer.5": 613.6631, "encoder_q-layer.6": 599.4079, "encoder_q-layer.7": 575.0724, "encoder_q-layer.8": 482.6959, "encoder_q-layer.9": 350.8269, "epoch": 0.19, "inbatch_neg_score": 0.2855, "inbatch_pos_score": 0.8955, "learning_rate": 4.466666666666667e-05, "loss": 3.8435, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.6502, "num_token_overlap": 14.579, "num_token_query": 37.2852, "num_token_union": 65.2962, "num_word_context": 202.3733, "num_word_doc": 49.7401, "num_word_query": 27.9141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.8951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.4893, "queue_k_norm": 1.4028, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2852, "sent_len_1": 66.6502, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.4475, "stdk": 0.0471, "stdq": 0.0452, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8426, "doc_norm": 1.4077, "encoder_q-embeddings": 721.2108, "encoder_q-layer.0": 535.6205, "encoder_q-layer.1": 558.6066, "encoder_q-layer.10": 347.6558, "encoder_q-layer.11": 999.3803, "encoder_q-layer.2": 626.283, "encoder_q-layer.3": 632.2157, "encoder_q-layer.4": 598.5659, "encoder_q-layer.5": 479.9753, "encoder_q-layer.6": 473.0219, "encoder_q-layer.7": 449.3781, "encoder_q-layer.8": 426.6123, "encoder_q-layer.9": 323.5942, "epoch": 0.19, "inbatch_neg_score": 0.2824, "inbatch_pos_score": 0.8633, "learning_rate": 4.461111111111111e-05, "loss": 3.8426, "norm_diff": 0.0389, "norm_loss": 0.0, "num_token_doc": 66.8496, "num_token_overlap": 14.6019, "num_token_query": 37.2134, "num_token_union": 65.3029, "num_word_context": 202.3581, "num_word_doc": 49.8813, "num_word_query": 27.8595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 903.198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4466, "queue_k_norm": 1.4022, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2134, "sent_len_1": 66.8496, "sent_len_max_0": 127.9825, "sent_len_max_1": 190.1387, "stdk": 0.0472, "stdq": 0.044, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.8528, "doc_norm": 1.4023, "encoder_q-embeddings": 1067.48, "encoder_q-layer.0": 760.3171, "encoder_q-layer.1": 850.1859, "encoder_q-layer.10": 347.9846, "encoder_q-layer.11": 1020.1155, "encoder_q-layer.2": 1114.7397, "encoder_q-layer.3": 1176.3433, "encoder_q-layer.4": 974.9984, "encoder_q-layer.5": 837.5908, "encoder_q-layer.6": 834.3282, "encoder_q-layer.7": 711.2582, "encoder_q-layer.8": 482.1261, "encoder_q-layer.9": 318.8717, "epoch": 0.19, "inbatch_neg_score": 0.2691, "inbatch_pos_score": 0.8584, "learning_rate": 4.4555555555555555e-05, "loss": 3.8528, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.6749, "num_token_overlap": 14.5513, "num_token_query": 37.2281, "num_token_union": 65.3566, "num_word_context": 202.1485, "num_word_doc": 49.7899, "num_word_query": 27.8955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1280.6162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2683, "query_norm": 1.4123, "queue_k_norm": 1.4029, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2281, "sent_len_1": 66.6749, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4238, "stdk": 0.047, "stdq": 0.0434, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8588, "doc_norm": 1.4078, "encoder_q-embeddings": 1123.1147, "encoder_q-layer.0": 801.1213, "encoder_q-layer.1": 884.1791, "encoder_q-layer.10": 439.4359, "encoder_q-layer.11": 1108.7754, "encoder_q-layer.2": 981.9319, "encoder_q-layer.3": 976.1702, "encoder_q-layer.4": 997.9146, "encoder_q-layer.5": 897.7394, "encoder_q-layer.6": 934.1469, "encoder_q-layer.7": 811.7343, "encoder_q-layer.8": 555.5004, "encoder_q-layer.9": 361.1069, "epoch": 0.19, "inbatch_neg_score": 0.2765, "inbatch_pos_score": 0.8574, "learning_rate": 4.4500000000000004e-05, "loss": 3.8588, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.9496, "num_token_overlap": 14.6314, "num_token_query": 37.3829, "num_token_union": 65.5202, "num_word_context": 202.2541, "num_word_doc": 49.9615, "num_word_query": 27.9897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1328.7793, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2771, "query_norm": 1.4509, "queue_k_norm": 1.4036, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3829, "sent_len_1": 66.9496, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.2763, "stdk": 0.0472, "stdq": 0.0444, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9137, "doc_norm": 1.4032, "encoder_q-embeddings": 19743.1914, "encoder_q-layer.0": 14273.626, "encoder_q-layer.1": 13122.666, "encoder_q-layer.10": 822.8656, "encoder_q-layer.11": 2119.7629, "encoder_q-layer.2": 14250.2529, "encoder_q-layer.3": 14852.1377, "encoder_q-layer.4": 18987.7891, "encoder_q-layer.5": 14511.0117, "encoder_q-layer.6": 15371.1777, "encoder_q-layer.7": 14473.2998, "encoder_q-layer.8": 8444.2969, "encoder_q-layer.9": 2950.4539, "epoch": 0.2, "inbatch_neg_score": 0.2807, "inbatch_pos_score": 0.8652, "learning_rate": 4.4444444444444447e-05, "loss": 3.9137, "norm_diff": 0.0205, "norm_loss": 0.0, "num_token_doc": 66.6659, "num_token_overlap": 14.5496, "num_token_query": 37.0366, "num_token_union": 65.1587, "num_word_context": 202.0282, "num_word_doc": 49.7927, "num_word_query": 27.7083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20698.365, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2788, "query_norm": 1.4237, "queue_k_norm": 1.4014, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.0366, "sent_len_1": 66.6659, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0762, "stdk": 0.047, "stdq": 0.0442, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 27.7905, "dev_samples_per_second": 2.303, "dev_steps_per_second": 0.036, "epoch": 0.2, "step": 20000, "test_accuracy": 91.69921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5058528184890747, "test_doc_norm": 1.3665536642074585, "test_inbatch_neg_score": 0.5986124873161316, "test_inbatch_pos_score": 1.4486560821533203, "test_loss": 0.5058528184890747, "test_loss_align": 1.2665932178497314, "test_loss_unif": 3.8398215770721436, "test_loss_unif_q@queue": 3.8398215770721436, "test_norm_diff": 0.13810236752033234, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.26778310537338257, "test_query_norm": 1.5046560764312744, "test_queue_k_norm": 1.4014105796813965, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04048263281583786, "test_stdq": 0.0419442355632782, "test_stdqueue_k": 0.04700257629156113, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.7905, "dev_samples_per_second": 2.303, "dev_steps_per_second": 0.036, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.32694, "eval_beir-arguana_recall@10": 0.55761, "eval_beir-arguana_recall@100": 0.87696, "eval_beir-arguana_recall@20": 0.70484, "eval_beir-avg_ndcg@10": 0.31162975, "eval_beir-avg_recall@10": 0.3775988333333333, "eval_beir-avg_recall@100": 0.5661563333333334, "eval_beir-avg_recall@20": 0.43946033333333334, "eval_beir-cqadupstack_ndcg@10": 0.21029750000000003, "eval_beir-cqadupstack_recall@10": 0.29125833333333334, "eval_beir-cqadupstack_recall@100": 0.5107633333333333, "eval_beir-cqadupstack_recall@20": 0.35362333333333335, "eval_beir-fiqa_ndcg@10": 0.16741, "eval_beir-fiqa_recall@10": 0.22279, "eval_beir-fiqa_recall@100": 0.46563, "eval_beir-fiqa_recall@20": 0.28083, "eval_beir-nfcorpus_ndcg@10": 0.26017, "eval_beir-nfcorpus_recall@10": 0.12101, "eval_beir-nfcorpus_recall@100": 0.24898, "eval_beir-nfcorpus_recall@20": 0.15134, "eval_beir-nq_ndcg@10": 0.20981, "eval_beir-nq_recall@10": 0.3497, "eval_beir-nq_recall@100": 0.68212, "eval_beir-nq_recall@20": 0.45985, "eval_beir-quora_ndcg@10": 0.65983, "eval_beir-quora_recall@10": 0.79763, "eval_beir-quora_recall@100": 0.94982, "eval_beir-quora_recall@20": 0.86132, "eval_beir-scidocs_ndcg@10": 0.11946, "eval_beir-scidocs_recall@10": 0.12903, "eval_beir-scidocs_recall@100": 0.31602, "eval_beir-scidocs_recall@20": 0.17677, "eval_beir-scifact_ndcg@10": 0.56354, "eval_beir-scifact_recall@10": 0.72683, "eval_beir-scifact_recall@100": 0.88422, "eval_beir-scifact_recall@20": 0.79111, "eval_beir-trec-covid_ndcg@10": 0.42591, "eval_beir-trec-covid_recall@10": 0.46, "eval_beir-trec-covid_recall@100": 0.3268, "eval_beir-trec-covid_recall@20": 0.441, "eval_beir-webis-touche2020_ndcg@10": 0.17293, "eval_beir-webis-touche2020_recall@10": 0.12013, "eval_beir-webis-touche2020_recall@100": 0.40025, "eval_beir-webis-touche2020_recall@20": 0.17392, "eval_senteval-avg_sts": 0.7321114459830499, "eval_senteval-sickr_spearman": 0.7122766904343758, "eval_senteval-stsb_spearman": 0.751946201531724, "step": 20000, "test_accuracy": 91.69921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5058528184890747, "test_doc_norm": 1.3665536642074585, "test_inbatch_neg_score": 0.5986124873161316, "test_inbatch_pos_score": 1.4486560821533203, "test_loss": 0.5058528184890747, "test_loss_align": 1.2665932178497314, "test_loss_unif": 3.8398215770721436, "test_loss_unif_q@queue": 3.8398215770721436, "test_norm_diff": 0.13810236752033234, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.26778310537338257, "test_query_norm": 1.5046560764312744, "test_queue_k_norm": 1.4014105796813965, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04048263281583786, "test_stdq": 0.0419442355632782, "test_stdqueue_k": 0.04700257629156113, "test_stdqueue_q": 0.0 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.8845, "doc_norm": 1.405, "encoder_q-embeddings": 1068.5991, "encoder_q-layer.0": 776.7203, "encoder_q-layer.1": 814.7473, "encoder_q-layer.10": 704.7567, "encoder_q-layer.11": 1894.657, "encoder_q-layer.2": 853.3231, "encoder_q-layer.3": 825.7357, "encoder_q-layer.4": 823.7223, "encoder_q-layer.5": 766.9361, "encoder_q-layer.6": 785.451, "encoder_q-layer.7": 754.3622, "encoder_q-layer.8": 739.5257, "encoder_q-layer.9": 631.4193, "epoch": 0.2, "inbatch_neg_score": 0.2442, "inbatch_pos_score": 0.8423, "learning_rate": 4.438888888888889e-05, "loss": 3.8845, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.8359, "num_token_overlap": 14.5764, "num_token_query": 37.4542, "num_token_union": 65.4868, "num_word_context": 202.572, "num_word_doc": 49.8823, "num_word_query": 28.0569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1484.3297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2452, "query_norm": 1.3927, "queue_k_norm": 1.4039, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4542, "sent_len_1": 66.8359, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.055, "stdk": 0.047, "stdq": 0.0441, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.8836, "doc_norm": 1.4018, "encoder_q-embeddings": 5399.7495, "encoder_q-layer.0": 4007.9543, "encoder_q-layer.1": 4076.7224, "encoder_q-layer.10": 826.9344, "encoder_q-layer.11": 2112.094, "encoder_q-layer.2": 4455.3701, "encoder_q-layer.3": 4792.543, "encoder_q-layer.4": 4644.4346, "encoder_q-layer.5": 4072.9316, "encoder_q-layer.6": 4387.7549, "encoder_q-layer.7": 4749.4448, "encoder_q-layer.8": 2332.0742, "encoder_q-layer.9": 812.4346, "epoch": 0.2, "inbatch_neg_score": 0.2344, "inbatch_pos_score": 0.8027, "learning_rate": 4.433333333333334e-05, "loss": 3.8836, "norm_diff": 0.0439, "norm_loss": 0.0, "num_token_doc": 66.7243, "num_token_overlap": 14.5945, "num_token_query": 37.3543, "num_token_union": 65.3803, "num_word_context": 202.0342, "num_word_doc": 49.7777, "num_word_query": 27.9552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5966.9119, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2343, "query_norm": 1.3579, "queue_k_norm": 1.4032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3543, "sent_len_1": 66.7243, "sent_len_max_0": 128.0, "sent_len_max_1": 191.865, "stdk": 0.047, "stdq": 0.0429, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8614, "doc_norm": 1.4039, "encoder_q-embeddings": 3682.8962, "encoder_q-layer.0": 2619.1228, "encoder_q-layer.1": 2682.9768, "encoder_q-layer.10": 739.2211, "encoder_q-layer.11": 1895.6587, "encoder_q-layer.2": 2851.1958, "encoder_q-layer.3": 3153.7961, "encoder_q-layer.4": 3225.1277, "encoder_q-layer.5": 2877.9114, "encoder_q-layer.6": 2354.6758, "encoder_q-layer.7": 1589.8369, "encoder_q-layer.8": 1233.504, "encoder_q-layer.9": 669.0292, "epoch": 0.2, "inbatch_neg_score": 0.2182, "inbatch_pos_score": 0.8091, "learning_rate": 4.427777777777778e-05, "loss": 3.8614, "norm_diff": 0.0277, "norm_loss": 0.0, "num_token_doc": 66.9642, "num_token_overlap": 14.4658, "num_token_query": 37.0473, "num_token_union": 65.4146, "num_word_context": 202.3514, "num_word_doc": 49.9446, "num_word_query": 27.7062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3742.9815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2179, "query_norm": 1.3762, "queue_k_norm": 1.4043, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.0473, "sent_len_1": 66.9642, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.9387, "stdk": 0.0471, "stdq": 0.0436, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8219, "doc_norm": 1.4032, "encoder_q-embeddings": 3580.1189, "encoder_q-layer.0": 2506.3582, "encoder_q-layer.1": 2908.8445, "encoder_q-layer.10": 823.0052, "encoder_q-layer.11": 2057.866, "encoder_q-layer.2": 3348.8374, "encoder_q-layer.3": 3821.2605, "encoder_q-layer.4": 3929.7024, "encoder_q-layer.5": 3993.7063, "encoder_q-layer.6": 3799.6768, "encoder_q-layer.7": 3578.6975, "encoder_q-layer.8": 2720.2756, "encoder_q-layer.9": 938.6234, "epoch": 0.2, "inbatch_neg_score": 0.2238, "inbatch_pos_score": 0.7896, "learning_rate": 4.422222222222222e-05, "loss": 3.8219, "norm_diff": 0.052, "norm_loss": 0.0, "num_token_doc": 66.9519, "num_token_overlap": 14.6685, "num_token_query": 37.4698, "num_token_union": 65.5219, "num_word_context": 202.3823, "num_word_doc": 49.9861, "num_word_query": 28.0634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4583.9585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2227, "query_norm": 1.3512, "queue_k_norm": 1.4029, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4698, "sent_len_1": 66.9519, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.9613, "stdk": 0.0471, "stdq": 0.0428, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8476, "doc_norm": 1.3973, "encoder_q-embeddings": 1575.3448, "encoder_q-layer.0": 1009.4966, "encoder_q-layer.1": 1214.3668, "encoder_q-layer.10": 769.4449, "encoder_q-layer.11": 1931.4113, "encoder_q-layer.2": 1262.9933, "encoder_q-layer.3": 1371.2791, "encoder_q-layer.4": 1332.4447, "encoder_q-layer.5": 1062.3842, "encoder_q-layer.6": 961.0316, "encoder_q-layer.7": 860.1937, "encoder_q-layer.8": 856.7595, "encoder_q-layer.9": 652.3909, "epoch": 0.2, "inbatch_neg_score": 0.2253, "inbatch_pos_score": 0.8091, "learning_rate": 4.4166666666666665e-05, "loss": 3.8476, "norm_diff": 0.0199, "norm_loss": 0.0, "num_token_doc": 66.6608, "num_token_overlap": 14.5658, "num_token_query": 37.4202, "num_token_union": 65.3721, "num_word_context": 202.2522, "num_word_doc": 49.7422, "num_word_query": 28.0068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1856.7624, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2244, "query_norm": 1.3817, "queue_k_norm": 1.4011, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4202, "sent_len_1": 66.6608, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6225, "stdk": 0.047, "stdq": 0.044, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8435, "doc_norm": 1.4047, "encoder_q-embeddings": 16813.4453, "encoder_q-layer.0": 12161.9482, "encoder_q-layer.1": 14105.3574, "encoder_q-layer.10": 732.1589, "encoder_q-layer.11": 1960.8506, "encoder_q-layer.2": 17009.418, "encoder_q-layer.3": 17557.3906, "encoder_q-layer.4": 16060.5674, "encoder_q-layer.5": 14181.9512, "encoder_q-layer.6": 12039.9766, "encoder_q-layer.7": 10542.5117, "encoder_q-layer.8": 4044.3875, "encoder_q-layer.9": 1105.3192, "epoch": 0.2, "inbatch_neg_score": 0.2358, "inbatch_pos_score": 0.8354, "learning_rate": 4.4111111111111114e-05, "loss": 3.8435, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.6488, "num_token_overlap": 14.5733, "num_token_query": 37.2283, "num_token_union": 65.2939, "num_word_context": 202.1344, "num_word_doc": 49.7485, "num_word_query": 27.8789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18678.2368, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2355, "query_norm": 1.4044, "queue_k_norm": 1.399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2283, "sent_len_1": 66.6488, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0188, "stdk": 0.0473, "stdq": 0.0444, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.8316, "doc_norm": 1.4038, "encoder_q-embeddings": 887.5854, "encoder_q-layer.0": 602.0687, "encoder_q-layer.1": 653.4517, "encoder_q-layer.10": 747.2911, "encoder_q-layer.11": 2059.9119, "encoder_q-layer.2": 721.4873, "encoder_q-layer.3": 800.6903, "encoder_q-layer.4": 827.0554, "encoder_q-layer.5": 748.8171, "encoder_q-layer.6": 769.6125, "encoder_q-layer.7": 742.3337, "encoder_q-layer.8": 785.0702, "encoder_q-layer.9": 675.7838, "epoch": 0.2, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.856, "learning_rate": 4.4055555555555557e-05, "loss": 3.8316, "norm_diff": 0.0212, "norm_loss": 0.0, "num_token_doc": 66.876, "num_token_overlap": 14.6242, "num_token_query": 37.2372, "num_token_union": 65.346, "num_word_context": 202.1977, "num_word_doc": 49.8889, "num_word_query": 27.8804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1435.6097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2341, "query_norm": 1.4128, "queue_k_norm": 1.4004, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2372, "sent_len_1": 66.876, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.6863, "stdk": 0.0473, "stdq": 0.0447, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8695, "doc_norm": 1.3993, "encoder_q-embeddings": 2103.823, "encoder_q-layer.0": 1429.4817, "encoder_q-layer.1": 1552.8588, "encoder_q-layer.10": 690.683, "encoder_q-layer.11": 1905.2089, "encoder_q-layer.2": 1754.9127, "encoder_q-layer.3": 1820.2592, "encoder_q-layer.4": 1849.0999, "encoder_q-layer.5": 1467.6956, "encoder_q-layer.6": 1427.4507, "encoder_q-layer.7": 1223.3889, "encoder_q-layer.8": 1058.0071, "encoder_q-layer.9": 742.7568, "epoch": 0.2, "inbatch_neg_score": 0.2425, "inbatch_pos_score": 0.8384, "learning_rate": 4.4000000000000006e-05, "loss": 3.8695, "norm_diff": 0.0191, "norm_loss": 0.0, "num_token_doc": 66.6485, "num_token_overlap": 14.5821, "num_token_query": 37.2412, "num_token_union": 65.2537, "num_word_context": 202.2124, "num_word_doc": 49.7193, "num_word_query": 27.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2361.0182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.241, "query_norm": 1.4165, "queue_k_norm": 1.4004, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2412, "sent_len_1": 66.6485, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.84, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8437, "doc_norm": 1.3956, "encoder_q-embeddings": 5256.7739, "encoder_q-layer.0": 4200.7114, "encoder_q-layer.1": 4712.1689, "encoder_q-layer.10": 681.6948, "encoder_q-layer.11": 1900.9344, "encoder_q-layer.2": 4508.3848, "encoder_q-layer.3": 4834.1514, "encoder_q-layer.4": 4993.9448, "encoder_q-layer.5": 4572.5679, "encoder_q-layer.6": 4334.0322, "encoder_q-layer.7": 4009.0662, "encoder_q-layer.8": 2651.761, "encoder_q-layer.9": 978.7751, "epoch": 0.2, "inbatch_neg_score": 0.2428, "inbatch_pos_score": 0.8096, "learning_rate": 4.394444444444445e-05, "loss": 3.8437, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 67.0919, "num_token_overlap": 14.6371, "num_token_query": 37.2553, "num_token_union": 65.4726, "num_word_context": 202.7763, "num_word_doc": 50.1162, "num_word_query": 27.8957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6243.7606, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2435, "query_norm": 1.3966, "queue_k_norm": 1.4016, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2553, "sent_len_1": 67.0919, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1825, "stdk": 0.047, "stdq": 0.0436, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.84, "doc_norm": 1.4059, "encoder_q-embeddings": 2981.9531, "encoder_q-layer.0": 2170.4414, "encoder_q-layer.1": 2361.7192, "encoder_q-layer.10": 694.293, "encoder_q-layer.11": 1982.0341, "encoder_q-layer.2": 2684.9407, "encoder_q-layer.3": 3084.4924, "encoder_q-layer.4": 3414.6082, "encoder_q-layer.5": 2920.3899, "encoder_q-layer.6": 2574.7437, "encoder_q-layer.7": 2302.1946, "encoder_q-layer.8": 1270.512, "encoder_q-layer.9": 700.2852, "epoch": 0.21, "inbatch_neg_score": 0.2597, "inbatch_pos_score": 0.8394, "learning_rate": 4.388888888888889e-05, "loss": 3.84, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 14.5787, "num_token_query": 37.3196, "num_token_union": 65.432, "num_word_context": 202.3613, "num_word_doc": 49.841, "num_word_query": 27.9333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3747.8114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.259, "query_norm": 1.4282, "queue_k_norm": 1.4013, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3196, "sent_len_1": 66.7805, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2175, "stdk": 0.0474, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.8546, "doc_norm": 1.401, "encoder_q-embeddings": 733.8289, "encoder_q-layer.0": 496.8892, "encoder_q-layer.1": 551.8862, "encoder_q-layer.10": 685.2326, "encoder_q-layer.11": 1980.8796, "encoder_q-layer.2": 582.0969, "encoder_q-layer.3": 614.6031, "encoder_q-layer.4": 641.9637, "encoder_q-layer.5": 558.9004, "encoder_q-layer.6": 591.9385, "encoder_q-layer.7": 641.9857, "encoder_q-layer.8": 707.2243, "encoder_q-layer.9": 609.3514, "epoch": 0.21, "inbatch_neg_score": 0.2636, "inbatch_pos_score": 0.8438, "learning_rate": 4.383333333333334e-05, "loss": 3.8546, "norm_diff": 0.009, "norm_loss": 0.0, "num_token_doc": 66.6568, "num_token_overlap": 14.577, "num_token_query": 37.299, "num_token_union": 65.2709, "num_word_context": 202.0233, "num_word_doc": 49.6893, "num_word_query": 27.9079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1295.6055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.3964, "queue_k_norm": 1.4003, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.299, "sent_len_1": 66.6568, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2825, "stdk": 0.0472, "stdq": 0.0431, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.8532, "doc_norm": 1.3998, "encoder_q-embeddings": 3874.5835, "encoder_q-layer.0": 2718.1548, "encoder_q-layer.1": 2792.1807, "encoder_q-layer.10": 882.543, "encoder_q-layer.11": 1978.2393, "encoder_q-layer.2": 3088.0139, "encoder_q-layer.3": 3185.2207, "encoder_q-layer.4": 4061.4106, "encoder_q-layer.5": 3818.0969, "encoder_q-layer.6": 3640.6982, "encoder_q-layer.7": 3162.939, "encoder_q-layer.8": 2395.4709, "encoder_q-layer.9": 995.7739, "epoch": 0.21, "inbatch_neg_score": 0.2577, "inbatch_pos_score": 0.8599, "learning_rate": 4.377777777777778e-05, "loss": 3.8532, "norm_diff": 0.0198, "norm_loss": 0.0, "num_token_doc": 66.7723, "num_token_overlap": 14.6128, "num_token_query": 37.4229, "num_token_union": 65.4509, "num_word_context": 202.4405, "num_word_doc": 49.8752, "num_word_query": 28.0451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4515.4676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2576, "query_norm": 1.4188, "queue_k_norm": 1.4032, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4229, "sent_len_1": 66.7723, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.505, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.8381, "doc_norm": 1.3987, "encoder_q-embeddings": 3099.4624, "encoder_q-layer.0": 2124.6494, "encoder_q-layer.1": 2513.3879, "encoder_q-layer.10": 665.8329, "encoder_q-layer.11": 1796.9886, "encoder_q-layer.2": 2832.4854, "encoder_q-layer.3": 2788.936, "encoder_q-layer.4": 2932.9993, "encoder_q-layer.5": 2838.625, "encoder_q-layer.6": 3002.4971, "encoder_q-layer.7": 2690.625, "encoder_q-layer.8": 1755.923, "encoder_q-layer.9": 780.6992, "epoch": 0.21, "inbatch_neg_score": 0.2545, "inbatch_pos_score": 0.8574, "learning_rate": 4.3722222222222224e-05, "loss": 3.8381, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 14.6458, "num_token_query": 37.2968, "num_token_union": 65.2788, "num_word_context": 202.2394, "num_word_doc": 49.8149, "num_word_query": 27.9217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3632.961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.4083, "queue_k_norm": 1.4033, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2968, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7637, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.8311, "doc_norm": 1.3998, "encoder_q-embeddings": 2150.6799, "encoder_q-layer.0": 1586.3826, "encoder_q-layer.1": 1626.8964, "encoder_q-layer.10": 621.1028, "encoder_q-layer.11": 1826.2589, "encoder_q-layer.2": 1806.719, "encoder_q-layer.3": 1619.6941, "encoder_q-layer.4": 1714.5273, "encoder_q-layer.5": 1633.6173, "encoder_q-layer.6": 1513.0604, "encoder_q-layer.7": 1363.0132, "encoder_q-layer.8": 1006.4201, "encoder_q-layer.9": 622.6795, "epoch": 0.21, "inbatch_neg_score": 0.2568, "inbatch_pos_score": 0.8447, "learning_rate": 4.3666666666666666e-05, "loss": 3.8311, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.5514, "num_token_overlap": 14.5952, "num_token_query": 37.4393, "num_token_union": 65.2844, "num_word_context": 201.9326, "num_word_doc": 49.6949, "num_word_query": 28.035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2356.9084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.3842, "queue_k_norm": 1.404, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4393, "sent_len_1": 66.5514, "sent_len_max_0": 128.0, "sent_len_max_1": 188.13, "stdk": 0.0471, "stdq": 0.0436, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8354, "doc_norm": 1.3979, "encoder_q-embeddings": 12531.1152, "encoder_q-layer.0": 9609.2666, "encoder_q-layer.1": 10736.2715, "encoder_q-layer.10": 835.8004, "encoder_q-layer.11": 2132.896, "encoder_q-layer.2": 12080.7969, "encoder_q-layer.3": 11809.0791, "encoder_q-layer.4": 11602.7959, "encoder_q-layer.5": 10459.8506, "encoder_q-layer.6": 8790.6035, "encoder_q-layer.7": 5723.3184, "encoder_q-layer.8": 3274.8379, "encoder_q-layer.9": 1008.0854, "epoch": 0.21, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.8267, "learning_rate": 4.3611111111111116e-05, "loss": 3.8354, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 14.5125, "num_token_query": 37.1548, "num_token_union": 65.3677, "num_word_context": 202.2893, "num_word_doc": 49.8855, "num_word_query": 27.8038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13625.807, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2561, "query_norm": 1.3927, "queue_k_norm": 1.4037, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1548, "sent_len_1": 66.8259, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0062, "stdk": 0.047, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.8299, "doc_norm": 1.409, "encoder_q-embeddings": 4968.9077, "encoder_q-layer.0": 3868.5017, "encoder_q-layer.1": 4741.0986, "encoder_q-layer.10": 759.7056, "encoder_q-layer.11": 1907.8406, "encoder_q-layer.2": 4372.2568, "encoder_q-layer.3": 4258.2847, "encoder_q-layer.4": 4092.0034, "encoder_q-layer.5": 3602.4412, "encoder_q-layer.6": 3013.4487, "encoder_q-layer.7": 2033.646, "encoder_q-layer.8": 1365.8561, "encoder_q-layer.9": 737.5704, "epoch": 0.21, "inbatch_neg_score": 0.252, "inbatch_pos_score": 0.8711, "learning_rate": 4.355555555555556e-05, "loss": 3.8299, "norm_diff": 0.0163, "norm_loss": 0.0, "num_token_doc": 66.8798, "num_token_overlap": 14.6414, "num_token_query": 37.5287, "num_token_union": 65.5257, "num_word_context": 202.2254, "num_word_doc": 49.8878, "num_word_query": 28.1073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5276.8318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.4232, "queue_k_norm": 1.404, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5287, "sent_len_1": 66.8798, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.655, "stdk": 0.0475, "stdq": 0.0451, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.8283, "doc_norm": 1.4008, "encoder_q-embeddings": 14011.8418, "encoder_q-layer.0": 9984.8613, "encoder_q-layer.1": 8245.9521, "encoder_q-layer.10": 812.577, "encoder_q-layer.11": 2188.5146, "encoder_q-layer.2": 6126.5259, "encoder_q-layer.3": 4338.7031, "encoder_q-layer.4": 3268.0396, "encoder_q-layer.5": 2707.6338, "encoder_q-layer.6": 2502.4375, "encoder_q-layer.7": 2186.2942, "encoder_q-layer.8": 1558.2987, "encoder_q-layer.9": 739.115, "epoch": 0.21, "inbatch_neg_score": 0.2491, "inbatch_pos_score": 0.8281, "learning_rate": 4.35e-05, "loss": 3.8283, "norm_diff": 0.0225, "norm_loss": 0.0, "num_token_doc": 66.6979, "num_token_overlap": 14.6673, "num_token_query": 37.5054, "num_token_union": 65.3634, "num_word_context": 202.3514, "num_word_doc": 49.7846, "num_word_query": 28.0851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10082.7824, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2484, "query_norm": 1.4123, "queue_k_norm": 1.4031, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5054, "sent_len_1": 66.6979, "sent_len_max_0": 128.0, "sent_len_max_1": 188.155, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8699, "doc_norm": 1.4045, "encoder_q-embeddings": 2540.5793, "encoder_q-layer.0": 1790.8892, "encoder_q-layer.1": 1905.9585, "encoder_q-layer.10": 765.616, "encoder_q-layer.11": 2000.6035, "encoder_q-layer.2": 2299.7942, "encoder_q-layer.3": 2443.1453, "encoder_q-layer.4": 2827.2812, "encoder_q-layer.5": 2737.3801, "encoder_q-layer.6": 2905.4351, "encoder_q-layer.7": 2404.3396, "encoder_q-layer.8": 2195.3733, "encoder_q-layer.9": 1048.4338, "epoch": 0.21, "inbatch_neg_score": 0.2451, "inbatch_pos_score": 0.8423, "learning_rate": 4.344444444444445e-05, "loss": 3.8699, "norm_diff": 0.0104, "norm_loss": 0.0, "num_token_doc": 66.7711, "num_token_overlap": 14.5595, "num_token_query": 37.067, "num_token_union": 65.2759, "num_word_context": 202.2496, "num_word_doc": 49.8242, "num_word_query": 27.7443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3319.9314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2446, "query_norm": 1.403, "queue_k_norm": 1.4048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.067, "sent_len_1": 66.7711, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3063, "stdk": 0.0473, "stdq": 0.0444, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.8745, "doc_norm": 1.3974, "encoder_q-embeddings": 926.1231, "encoder_q-layer.0": 686.4853, "encoder_q-layer.1": 722.4475, "encoder_q-layer.10": 690.3052, "encoder_q-layer.11": 1744.5564, "encoder_q-layer.2": 774.4851, "encoder_q-layer.3": 850.8234, "encoder_q-layer.4": 847.3915, "encoder_q-layer.5": 880.2211, "encoder_q-layer.6": 857.8146, "encoder_q-layer.7": 776.9938, "encoder_q-layer.8": 776.6971, "encoder_q-layer.9": 640.3019, "epoch": 0.21, "inbatch_neg_score": 0.2238, "inbatch_pos_score": 0.8247, "learning_rate": 4.338888888888889e-05, "loss": 3.8745, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.9019, "num_token_overlap": 14.5422, "num_token_query": 37.171, "num_token_union": 65.4057, "num_word_context": 202.8188, "num_word_doc": 49.9378, "num_word_query": 27.8229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1349.6065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2229, "query_norm": 1.4245, "queue_k_norm": 1.4038, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.171, "sent_len_1": 66.9019, "sent_len_max_0": 128.0, "sent_len_max_1": 188.415, "stdk": 0.047, "stdq": 0.0451, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8332, "doc_norm": 1.4057, "encoder_q-embeddings": 1585.0956, "encoder_q-layer.0": 1082.583, "encoder_q-layer.1": 1108.4187, "encoder_q-layer.10": 1402.9036, "encoder_q-layer.11": 3850.5552, "encoder_q-layer.2": 1216.0266, "encoder_q-layer.3": 1352.5463, "encoder_q-layer.4": 1394.1609, "encoder_q-layer.5": 1312.197, "encoder_q-layer.6": 1468.5812, "encoder_q-layer.7": 1541.3314, "encoder_q-layer.8": 1677.5803, "encoder_q-layer.9": 1356.4991, "epoch": 0.21, "inbatch_neg_score": 0.2141, "inbatch_pos_score": 0.8047, "learning_rate": 4.3333333333333334e-05, "loss": 3.8332, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.8979, "num_token_overlap": 14.7055, "num_token_query": 37.3987, "num_token_union": 65.4337, "num_word_context": 202.6697, "num_word_doc": 49.9556, "num_word_query": 27.977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2549.3448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2126, "query_norm": 1.4314, "queue_k_norm": 1.4041, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3987, "sent_len_1": 66.8979, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.9487, "stdk": 0.0474, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8498, "doc_norm": 1.4105, "encoder_q-embeddings": 24405.3809, "encoder_q-layer.0": 18198.2363, "encoder_q-layer.1": 19532.5176, "encoder_q-layer.10": 771.3773, "encoder_q-layer.11": 1885.0872, "encoder_q-layer.2": 20812.2109, "encoder_q-layer.3": 16245.5342, "encoder_q-layer.4": 12390.7354, "encoder_q-layer.5": 8556.8584, "encoder_q-layer.6": 6990.7017, "encoder_q-layer.7": 4595.0679, "encoder_q-layer.8": 2193.3792, "encoder_q-layer.9": 693.8351, "epoch": 0.22, "inbatch_neg_score": 0.2083, "inbatch_pos_score": 0.8203, "learning_rate": 4.3277777777777776e-05, "loss": 3.8498, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.7756, "num_token_overlap": 14.5498, "num_token_query": 37.2547, "num_token_union": 65.3195, "num_word_context": 201.8354, "num_word_doc": 49.7832, "num_word_query": 27.8824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21706.4231, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2083, "query_norm": 1.4418, "queue_k_norm": 1.405, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2547, "sent_len_1": 66.7756, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.6113, "stdk": 0.0476, "stdq": 0.0448, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8221, "doc_norm": 1.4021, "encoder_q-embeddings": 990.768, "encoder_q-layer.0": 715.9697, "encoder_q-layer.1": 773.9874, "encoder_q-layer.10": 686.8132, "encoder_q-layer.11": 1821.7238, "encoder_q-layer.2": 875.5954, "encoder_q-layer.3": 992.4236, "encoder_q-layer.4": 940.995, "encoder_q-layer.5": 859.174, "encoder_q-layer.6": 951.5315, "encoder_q-layer.7": 972.645, "encoder_q-layer.8": 940.0404, "encoder_q-layer.9": 663.7981, "epoch": 0.22, "inbatch_neg_score": 0.2249, "inbatch_pos_score": 0.8267, "learning_rate": 4.3222222222222226e-05, "loss": 3.8221, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.7592, "num_token_overlap": 14.6956, "num_token_query": 37.579, "num_token_union": 65.4459, "num_word_context": 202.4671, "num_word_doc": 49.8309, "num_word_query": 28.1581, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1494.0702, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2261, "query_norm": 1.4389, "queue_k_norm": 1.4075, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.579, "sent_len_1": 66.7592, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5213, "stdk": 0.0473, "stdq": 0.0447, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8364, "doc_norm": 1.4044, "encoder_q-embeddings": 1160.9673, "encoder_q-layer.0": 818.0028, "encoder_q-layer.1": 809.7254, "encoder_q-layer.10": 945.0004, "encoder_q-layer.11": 2090.6387, "encoder_q-layer.2": 861.9683, "encoder_q-layer.3": 882.116, "encoder_q-layer.4": 904.6599, "encoder_q-layer.5": 822.2444, "encoder_q-layer.6": 879.9556, "encoder_q-layer.7": 913.8112, "encoder_q-layer.8": 978.6409, "encoder_q-layer.9": 740.4483, "epoch": 0.22, "inbatch_neg_score": 0.2207, "inbatch_pos_score": 0.7881, "learning_rate": 4.316666666666667e-05, "loss": 3.8364, "norm_diff": 0.0148, "norm_loss": 0.0, "num_token_doc": 66.9059, "num_token_overlap": 14.5538, "num_token_query": 37.2452, "num_token_union": 65.4095, "num_word_context": 202.4279, "num_word_doc": 49.9, "num_word_query": 27.8894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1558.1694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2196, "query_norm": 1.3986, "queue_k_norm": 1.4068, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2452, "sent_len_1": 66.9059, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2287, "stdk": 0.0474, "stdq": 0.0435, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8417, "doc_norm": 1.4144, "encoder_q-embeddings": 6553.4937, "encoder_q-layer.0": 4696.0625, "encoder_q-layer.1": 5075.7515, "encoder_q-layer.10": 817.8777, "encoder_q-layer.11": 1827.7252, "encoder_q-layer.2": 6203.1489, "encoder_q-layer.3": 7440.0283, "encoder_q-layer.4": 9863.2207, "encoder_q-layer.5": 9804.4844, "encoder_q-layer.6": 7050.835, "encoder_q-layer.7": 3208.0432, "encoder_q-layer.8": 1294.7959, "encoder_q-layer.9": 787.7654, "epoch": 0.22, "inbatch_neg_score": 0.2174, "inbatch_pos_score": 0.8105, "learning_rate": 4.311111111111111e-05, "loss": 3.8417, "norm_diff": 0.0092, "norm_loss": 0.0, "num_token_doc": 66.816, "num_token_overlap": 14.6073, "num_token_query": 37.3637, "num_token_union": 65.4277, "num_word_context": 202.3775, "num_word_doc": 49.8629, "num_word_query": 27.969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8468.0863, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2155, "query_norm": 1.4189, "queue_k_norm": 1.4063, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3637, "sent_len_1": 66.816, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.64, "stdk": 0.0478, "stdq": 0.0446, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.8242, "doc_norm": 1.4091, "encoder_q-embeddings": 30549.1406, "encoder_q-layer.0": 24274.1406, "encoder_q-layer.1": 22598.3418, "encoder_q-layer.10": 1495.0096, "encoder_q-layer.11": 1708.61, "encoder_q-layer.2": 31324.8887, "encoder_q-layer.3": 40148.457, "encoder_q-layer.4": 34485.3828, "encoder_q-layer.5": 37479.9805, "encoder_q-layer.6": 36838.0703, "encoder_q-layer.7": 40665.3711, "encoder_q-layer.8": 36857.0273, "encoder_q-layer.9": 10310.4785, "epoch": 0.22, "inbatch_neg_score": 0.2241, "inbatch_pos_score": 0.8315, "learning_rate": 4.305555555555556e-05, "loss": 3.8242, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 67.1421, "num_token_overlap": 14.6143, "num_token_query": 37.3271, "num_token_union": 65.5522, "num_word_context": 202.713, "num_word_doc": 50.0868, "num_word_query": 27.9264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44152.4629, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.2251, "query_norm": 1.4264, "queue_k_norm": 1.408, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3271, "sent_len_1": 67.1421, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9525, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.8236, "doc_norm": 1.4055, "encoder_q-embeddings": 1306.1053, "encoder_q-layer.0": 900.1234, "encoder_q-layer.1": 1014.6685, "encoder_q-layer.10": 669.9935, "encoder_q-layer.11": 1733.6326, "encoder_q-layer.2": 1193.1154, "encoder_q-layer.3": 1305.9786, "encoder_q-layer.4": 1457.4581, "encoder_q-layer.5": 1505.5057, "encoder_q-layer.6": 1352.5341, "encoder_q-layer.7": 1044.2728, "encoder_q-layer.8": 795.5836, "encoder_q-layer.9": 593.1055, "epoch": 0.22, "inbatch_neg_score": 0.2348, "inbatch_pos_score": 0.8081, "learning_rate": 4.3e-05, "loss": 3.8236, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.8266, "num_token_overlap": 14.5463, "num_token_query": 37.1883, "num_token_union": 65.3103, "num_word_context": 202.2463, "num_word_doc": 49.8492, "num_word_query": 27.82, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1797.1129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2347, "query_norm": 1.3944, "queue_k_norm": 1.4101, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1883, "sent_len_1": 66.8266, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.6075, "stdk": 0.0475, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8239, "doc_norm": 1.4114, "encoder_q-embeddings": 2146.2598, "encoder_q-layer.0": 1698.5419, "encoder_q-layer.1": 1893.2285, "encoder_q-layer.10": 368.4297, "encoder_q-layer.11": 1048.3591, "encoder_q-layer.2": 1898.0887, "encoder_q-layer.3": 1890.4038, "encoder_q-layer.4": 1881.3541, "encoder_q-layer.5": 1687.9446, "encoder_q-layer.6": 1520.9849, "encoder_q-layer.7": 1218.265, "encoder_q-layer.8": 844.1066, "encoder_q-layer.9": 384.1333, "epoch": 0.22, "inbatch_neg_score": 0.2325, "inbatch_pos_score": 0.8115, "learning_rate": 4.294444444444445e-05, "loss": 3.8239, "norm_diff": 0.0206, "norm_loss": 0.0, "num_token_doc": 66.663, "num_token_overlap": 14.5391, "num_token_query": 37.2151, "num_token_union": 65.3029, "num_word_context": 202.3776, "num_word_doc": 49.7927, "num_word_query": 27.8788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2327.5979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.231, "query_norm": 1.3922, "queue_k_norm": 1.4115, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2151, "sent_len_1": 66.663, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.95, "stdk": 0.0477, "stdq": 0.0429, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8383, "doc_norm": 1.4064, "encoder_q-embeddings": 2456.8103, "encoder_q-layer.0": 1734.5902, "encoder_q-layer.1": 1899.4772, "encoder_q-layer.10": 356.1285, "encoder_q-layer.11": 1057.8079, "encoder_q-layer.2": 2109.791, "encoder_q-layer.3": 2125.7346, "encoder_q-layer.4": 2149.8237, "encoder_q-layer.5": 1921.5934, "encoder_q-layer.6": 1730.17, "encoder_q-layer.7": 1538.0264, "encoder_q-layer.8": 1090.4591, "encoder_q-layer.9": 374.4117, "epoch": 0.22, "inbatch_neg_score": 0.2266, "inbatch_pos_score": 0.7974, "learning_rate": 4.2888888888888886e-05, "loss": 3.8383, "norm_diff": 0.0173, "norm_loss": 0.0, "num_token_doc": 66.9212, "num_token_overlap": 14.5734, "num_token_query": 37.2515, "num_token_union": 65.4736, "num_word_context": 202.4089, "num_word_doc": 49.9203, "num_word_query": 27.8729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2646.4654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2283, "query_norm": 1.3893, "queue_k_norm": 1.4098, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2515, "sent_len_1": 66.9212, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2113, "stdk": 0.0476, "stdq": 0.0434, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.8325, "doc_norm": 1.4145, "encoder_q-embeddings": 1972.347, "encoder_q-layer.0": 1429.546, "encoder_q-layer.1": 1674.7832, "encoder_q-layer.10": 334.9095, "encoder_q-layer.11": 1003.9174, "encoder_q-layer.2": 1885.0256, "encoder_q-layer.3": 1891.4529, "encoder_q-layer.4": 1958.0845, "encoder_q-layer.5": 2022.3701, "encoder_q-layer.6": 1757.4958, "encoder_q-layer.7": 1122.3926, "encoder_q-layer.8": 678.3106, "encoder_q-layer.9": 336.4934, "epoch": 0.22, "inbatch_neg_score": 0.2326, "inbatch_pos_score": 0.8281, "learning_rate": 4.2833333333333335e-05, "loss": 3.8325, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.9002, "num_token_overlap": 14.5971, "num_token_query": 37.143, "num_token_union": 65.3138, "num_word_context": 201.9943, "num_word_doc": 49.8878, "num_word_query": 27.7945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2265.3625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2329, "query_norm": 1.3968, "queue_k_norm": 1.4099, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.143, "sent_len_1": 66.9002, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2163, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.8111, "doc_norm": 1.4112, "encoder_q-embeddings": 1794.9028, "encoder_q-layer.0": 1246.4353, "encoder_q-layer.1": 1276.4858, "encoder_q-layer.10": 333.6704, "encoder_q-layer.11": 891.2357, "encoder_q-layer.2": 1348.3318, "encoder_q-layer.3": 1483.6506, "encoder_q-layer.4": 1622.6066, "encoder_q-layer.5": 1690.7872, "encoder_q-layer.6": 1541.3988, "encoder_q-layer.7": 909.8748, "encoder_q-layer.8": 494.1842, "encoder_q-layer.9": 309.1054, "epoch": 0.22, "inbatch_neg_score": 0.2385, "inbatch_pos_score": 0.876, "learning_rate": 4.277777777777778e-05, "loss": 3.8111, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.6533, "num_token_overlap": 14.6391, "num_token_query": 37.4345, "num_token_union": 65.3096, "num_word_context": 201.7063, "num_word_doc": 49.7708, "num_word_query": 28.0505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1906.5023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2377, "query_norm": 1.4138, "queue_k_norm": 1.4092, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4345, "sent_len_1": 66.6533, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7575, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8232, "doc_norm": 1.4139, "encoder_q-embeddings": 4228.1245, "encoder_q-layer.0": 3301.7571, "encoder_q-layer.1": 3463.7217, "encoder_q-layer.10": 180.3843, "encoder_q-layer.11": 465.8897, "encoder_q-layer.2": 4303.5815, "encoder_q-layer.3": 4469.561, "encoder_q-layer.4": 3896.4011, "encoder_q-layer.5": 3048.4744, "encoder_q-layer.6": 2824.0894, "encoder_q-layer.7": 1790.7744, "encoder_q-layer.8": 590.9535, "encoder_q-layer.9": 192.5659, "epoch": 0.23, "inbatch_neg_score": 0.2346, "inbatch_pos_score": 0.8135, "learning_rate": 4.272222222222223e-05, "loss": 3.8232, "norm_diff": 0.0147, "norm_loss": 0.0, "num_token_doc": 66.9947, "num_token_overlap": 14.6038, "num_token_query": 37.3058, "num_token_union": 65.5308, "num_word_context": 202.6493, "num_word_doc": 50.0117, "num_word_query": 27.9466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4635.2955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2344, "query_norm": 1.404, "queue_k_norm": 1.4131, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3058, "sent_len_1": 66.9947, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1337, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8319, "doc_norm": 1.4103, "encoder_q-embeddings": 1779.1196, "encoder_q-layer.0": 1352.9371, "encoder_q-layer.1": 1219.7958, "encoder_q-layer.10": 258.3269, "encoder_q-layer.11": 558.8734, "encoder_q-layer.2": 1249.6216, "encoder_q-layer.3": 1219.6584, "encoder_q-layer.4": 1158.0269, "encoder_q-layer.5": 1028.2837, "encoder_q-layer.6": 995.6771, "encoder_q-layer.7": 683.8156, "encoder_q-layer.8": 513.3872, "encoder_q-layer.9": 243.8527, "epoch": 0.23, "inbatch_neg_score": 0.2383, "inbatch_pos_score": 0.8291, "learning_rate": 4.266666666666667e-05, "loss": 3.8319, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.9927, "num_token_overlap": 14.586, "num_token_query": 37.3752, "num_token_union": 65.5878, "num_word_context": 202.4399, "num_word_doc": 49.994, "num_word_query": 27.9982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1602.9252, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2369, "query_norm": 1.4256, "queue_k_norm": 1.4115, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3752, "sent_len_1": 66.9927, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.71, "stdk": 0.0476, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.8219, "doc_norm": 1.4094, "encoder_q-embeddings": 175.7014, "encoder_q-layer.0": 124.1453, "encoder_q-layer.1": 133.4217, "encoder_q-layer.10": 165.0992, "encoder_q-layer.11": 435.3232, "encoder_q-layer.2": 138.5045, "encoder_q-layer.3": 147.8052, "encoder_q-layer.4": 153.2748, "encoder_q-layer.5": 160.3088, "encoder_q-layer.6": 175.066, "encoder_q-layer.7": 172.8475, "encoder_q-layer.8": 190.987, "encoder_q-layer.9": 161.6502, "epoch": 0.23, "inbatch_neg_score": 0.2406, "inbatch_pos_score": 0.8447, "learning_rate": 4.261111111111111e-05, "loss": 3.8219, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.7973, "num_token_overlap": 14.6313, "num_token_query": 37.4099, "num_token_union": 65.3788, "num_word_context": 202.3576, "num_word_doc": 49.8685, "num_word_query": 28.0068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 296.955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2385, "query_norm": 1.4279, "queue_k_norm": 1.4111, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4099, "sent_len_1": 66.7973, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9275, "stdk": 0.0476, "stdq": 0.0451, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.8199, "doc_norm": 1.4126, "encoder_q-embeddings": 898.841, "encoder_q-layer.0": 623.1703, "encoder_q-layer.1": 729.0212, "encoder_q-layer.10": 188.0396, "encoder_q-layer.11": 457.5285, "encoder_q-layer.2": 834.8315, "encoder_q-layer.3": 841.7415, "encoder_q-layer.4": 834.1033, "encoder_q-layer.5": 735.3906, "encoder_q-layer.6": 626.0325, "encoder_q-layer.7": 440.1536, "encoder_q-layer.8": 331.9277, "encoder_q-layer.9": 174.1863, "epoch": 0.23, "inbatch_neg_score": 0.238, "inbatch_pos_score": 0.8174, "learning_rate": 4.255555555555556e-05, "loss": 3.8199, "norm_diff": 0.0251, "norm_loss": 0.0, "num_token_doc": 66.9686, "num_token_overlap": 14.6097, "num_token_query": 37.4628, "num_token_union": 65.5824, "num_word_context": 202.5961, "num_word_doc": 50.0112, "num_word_query": 28.044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 970.4497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2372, "query_norm": 1.4099, "queue_k_norm": 1.4113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4628, "sent_len_1": 66.9686, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3075, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.8089, "doc_norm": 1.4068, "encoder_q-embeddings": 170.8772, "encoder_q-layer.0": 113.8044, "encoder_q-layer.1": 119.442, "encoder_q-layer.10": 189.9926, "encoder_q-layer.11": 448.9593, "encoder_q-layer.2": 128.9977, "encoder_q-layer.3": 131.7236, "encoder_q-layer.4": 141.3791, "encoder_q-layer.5": 140.4896, "encoder_q-layer.6": 143.7944, "encoder_q-layer.7": 153.2979, "encoder_q-layer.8": 184.978, "encoder_q-layer.9": 159.5613, "epoch": 0.23, "inbatch_neg_score": 0.2445, "inbatch_pos_score": 0.8262, "learning_rate": 4.25e-05, "loss": 3.8089, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.7772, "num_token_overlap": 14.5993, "num_token_query": 37.3253, "num_token_union": 65.3548, "num_word_context": 202.4675, "num_word_doc": 49.8271, "num_word_query": 27.9386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.5268, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.243, "query_norm": 1.3979, "queue_k_norm": 1.4107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3253, "sent_len_1": 66.7772, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0813, "stdk": 0.0476, "stdq": 0.0439, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7929, "doc_norm": 1.4114, "encoder_q-embeddings": 277.7892, "encoder_q-layer.0": 192.5723, "encoder_q-layer.1": 219.0153, "encoder_q-layer.10": 173.2437, "encoder_q-layer.11": 464.9223, "encoder_q-layer.2": 233.8012, "encoder_q-layer.3": 243.329, "encoder_q-layer.4": 250.6733, "encoder_q-layer.5": 253.2218, "encoder_q-layer.6": 247.007, "encoder_q-layer.7": 238.4305, "encoder_q-layer.8": 213.2504, "encoder_q-layer.9": 160.1523, "epoch": 0.23, "inbatch_neg_score": 0.2325, "inbatch_pos_score": 0.8374, "learning_rate": 4.2444444444444445e-05, "loss": 3.7929, "norm_diff": 0.0222, "norm_loss": 0.0, "num_token_doc": 66.7464, "num_token_overlap": 14.596, "num_token_query": 37.2653, "num_token_union": 65.3351, "num_word_context": 201.9576, "num_word_doc": 49.7817, "num_word_query": 27.8919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 385.8668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2323, "query_norm": 1.3892, "queue_k_norm": 1.4111, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2653, "sent_len_1": 66.7464, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9425, "stdk": 0.0477, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.8077, "doc_norm": 1.4091, "encoder_q-embeddings": 352.7404, "encoder_q-layer.0": 231.1172, "encoder_q-layer.1": 239.4347, "encoder_q-layer.10": 188.0812, "encoder_q-layer.11": 463.4224, "encoder_q-layer.2": 287.1093, "encoder_q-layer.3": 292.5963, "encoder_q-layer.4": 319.2221, "encoder_q-layer.5": 301.9844, "encoder_q-layer.6": 279.682, "encoder_q-layer.7": 263.3499, "encoder_q-layer.8": 225.6929, "encoder_q-layer.9": 162.9128, "epoch": 0.23, "inbatch_neg_score": 0.2348, "inbatch_pos_score": 0.8193, "learning_rate": 4.238888888888889e-05, "loss": 3.8077, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.8385, "num_token_overlap": 14.6189, "num_token_query": 37.4877, "num_token_union": 65.4731, "num_word_context": 202.534, "num_word_doc": 49.8779, "num_word_query": 28.0875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 434.9184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2354, "query_norm": 1.3756, "queue_k_norm": 1.4122, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4877, "sent_len_1": 66.8385, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.59, "stdk": 0.0476, "stdq": 0.0438, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7968, "doc_norm": 1.4146, "encoder_q-embeddings": 520.4409, "encoder_q-layer.0": 341.674, "encoder_q-layer.1": 366.3794, "encoder_q-layer.10": 186.4216, "encoder_q-layer.11": 512.7702, "encoder_q-layer.2": 435.537, "encoder_q-layer.3": 499.8626, "encoder_q-layer.4": 545.3441, "encoder_q-layer.5": 581.6881, "encoder_q-layer.6": 621.4182, "encoder_q-layer.7": 611.3508, "encoder_q-layer.8": 397.2748, "encoder_q-layer.9": 186.71, "epoch": 0.23, "inbatch_neg_score": 0.2169, "inbatch_pos_score": 0.8042, "learning_rate": 4.233333333333334e-05, "loss": 3.7968, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.8055, "num_token_overlap": 14.6222, "num_token_query": 37.3039, "num_token_union": 65.3457, "num_word_context": 202.5354, "num_word_doc": 49.8508, "num_word_query": 27.9509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 692.1019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.217, "query_norm": 1.3733, "queue_k_norm": 1.4109, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3039, "sent_len_1": 66.8055, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1238, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8012, "doc_norm": 1.4072, "encoder_q-embeddings": 716.9529, "encoder_q-layer.0": 509.0998, "encoder_q-layer.1": 580.6102, "encoder_q-layer.10": 163.8951, "encoder_q-layer.11": 474.4848, "encoder_q-layer.2": 672.2731, "encoder_q-layer.3": 729.5801, "encoder_q-layer.4": 852.2047, "encoder_q-layer.5": 614.442, "encoder_q-layer.6": 380.6175, "encoder_q-layer.7": 228.0545, "encoder_q-layer.8": 213.5842, "encoder_q-layer.9": 154.5186, "epoch": 0.23, "inbatch_neg_score": 0.213, "inbatch_pos_score": 0.7856, "learning_rate": 4.227777777777778e-05, "loss": 3.8012, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.773, "num_token_overlap": 14.5588, "num_token_query": 37.1943, "num_token_union": 65.2999, "num_word_context": 201.8703, "num_word_doc": 49.7673, "num_word_query": 27.8378, "postclip_grad_norm": 1.0, "preclip_grad_norm": 805.8669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2124, "query_norm": 1.3708, "queue_k_norm": 1.4085, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1943, "sent_len_1": 66.773, "sent_len_max_0": 128.0, "sent_len_max_1": 190.645, "stdk": 0.0476, "stdq": 0.0438, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7856, "doc_norm": 1.4091, "encoder_q-embeddings": 223.8219, "encoder_q-layer.0": 152.1151, "encoder_q-layer.1": 177.1055, "encoder_q-layer.10": 209.0486, "encoder_q-layer.11": 482.1901, "encoder_q-layer.2": 204.2568, "encoder_q-layer.3": 207.5367, "encoder_q-layer.4": 211.4503, "encoder_q-layer.5": 202.2258, "encoder_q-layer.6": 195.5707, "encoder_q-layer.7": 161.2072, "encoder_q-layer.8": 177.9427, "encoder_q-layer.9": 165.3232, "epoch": 0.23, "inbatch_neg_score": 0.2102, "inbatch_pos_score": 0.8164, "learning_rate": 4.222222222222222e-05, "loss": 3.7856, "norm_diff": 0.0208, "norm_loss": 0.0, "num_token_doc": 66.7234, "num_token_overlap": 14.6139, "num_token_query": 37.4347, "num_token_union": 65.3948, "num_word_context": 202.2571, "num_word_doc": 49.7983, "num_word_query": 28.0471, "postclip_grad_norm": 1.0, "preclip_grad_norm": 342.5718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2086, "query_norm": 1.3882, "queue_k_norm": 1.407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4347, "sent_len_1": 66.7234, "sent_len_max_0": 127.99, "sent_len_max_1": 191.1712, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.807, "doc_norm": 1.3958, "encoder_q-embeddings": 1790.8297, "encoder_q-layer.0": 1318.8539, "encoder_q-layer.1": 1308.8091, "encoder_q-layer.10": 181.9849, "encoder_q-layer.11": 482.0995, "encoder_q-layer.2": 1633.1847, "encoder_q-layer.3": 1653.5411, "encoder_q-layer.4": 1916.0969, "encoder_q-layer.5": 1277.7812, "encoder_q-layer.6": 1075.675, "encoder_q-layer.7": 573.9641, "encoder_q-layer.8": 333.1822, "encoder_q-layer.9": 179.9055, "epoch": 0.24, "inbatch_neg_score": 0.2136, "inbatch_pos_score": 0.7988, "learning_rate": 4.216666666666667e-05, "loss": 3.807, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.6974, "num_token_overlap": 14.5513, "num_token_query": 37.2411, "num_token_union": 65.2992, "num_word_context": 202.4429, "num_word_doc": 49.7604, "num_word_query": 27.8642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1873.6995, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2133, "query_norm": 1.4034, "queue_k_norm": 1.4067, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2411, "sent_len_1": 66.6974, "sent_len_max_0": 128.0, "sent_len_max_1": 188.86, "stdk": 0.0472, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7995, "doc_norm": 1.3991, "encoder_q-embeddings": 4975.0181, "encoder_q-layer.0": 3969.7969, "encoder_q-layer.1": 4853.269, "encoder_q-layer.10": 207.7194, "encoder_q-layer.11": 539.7483, "encoder_q-layer.2": 5751.4712, "encoder_q-layer.3": 6249.0767, "encoder_q-layer.4": 5138.8047, "encoder_q-layer.5": 5161.8682, "encoder_q-layer.6": 6639.8369, "encoder_q-layer.7": 4430.3242, "encoder_q-layer.8": 2862.5369, "encoder_q-layer.9": 628.3616, "epoch": 0.24, "inbatch_neg_score": 0.2122, "inbatch_pos_score": 0.7939, "learning_rate": 4.211111111111111e-05, "loss": 3.7995, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 14.6852, "num_token_query": 37.6488, "num_token_union": 65.5246, "num_word_context": 202.8306, "num_word_doc": 49.8766, "num_word_query": 28.2033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6843.8735, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2115, "query_norm": 1.4118, "queue_k_norm": 1.4044, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6488, "sent_len_1": 66.7788, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5375, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8141, "doc_norm": 1.4059, "encoder_q-embeddings": 854.5142, "encoder_q-layer.0": 602.3669, "encoder_q-layer.1": 660.6891, "encoder_q-layer.10": 186.7652, "encoder_q-layer.11": 521.6428, "encoder_q-layer.2": 735.6464, "encoder_q-layer.3": 762.9183, "encoder_q-layer.4": 800.0583, "encoder_q-layer.5": 585.2684, "encoder_q-layer.6": 317.3256, "encoder_q-layer.7": 217.3697, "encoder_q-layer.8": 196.1744, "encoder_q-layer.9": 171.9843, "epoch": 0.24, "inbatch_neg_score": 0.2114, "inbatch_pos_score": 0.7803, "learning_rate": 4.205555555555556e-05, "loss": 3.8141, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.8084, "num_token_overlap": 14.5394, "num_token_query": 37.286, "num_token_union": 65.4192, "num_word_context": 202.4146, "num_word_doc": 49.8568, "num_word_query": 27.9242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 880.6568, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2114, "query_norm": 1.3679, "queue_k_norm": 1.4049, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.286, "sent_len_1": 66.8084, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4238, "stdk": 0.0476, "stdq": 0.043, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8022, "doc_norm": 1.4026, "encoder_q-embeddings": 793.7265, "encoder_q-layer.0": 571.1835, "encoder_q-layer.1": 642.1494, "encoder_q-layer.10": 177.9031, "encoder_q-layer.11": 484.7583, "encoder_q-layer.2": 811.7591, "encoder_q-layer.3": 760.0611, "encoder_q-layer.4": 690.6271, "encoder_q-layer.5": 607.651, "encoder_q-layer.6": 544.2165, "encoder_q-layer.7": 359.0039, "encoder_q-layer.8": 258.0452, "encoder_q-layer.9": 170.9832, "epoch": 0.24, "inbatch_neg_score": 0.209, "inbatch_pos_score": 0.7949, "learning_rate": 4.2e-05, "loss": 3.8022, "norm_diff": 0.0205, "norm_loss": 0.0, "num_token_doc": 66.7382, "num_token_overlap": 14.5185, "num_token_query": 37.1132, "num_token_union": 65.3002, "num_word_context": 202.0011, "num_word_doc": 49.783, "num_word_query": 27.7526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 880.1677, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2085, "query_norm": 1.3891, "queue_k_norm": 1.4029, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1132, "sent_len_1": 66.7382, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1662, "stdk": 0.0476, "stdq": 0.0439, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8025, "doc_norm": 1.3932, "encoder_q-embeddings": 656.3192, "encoder_q-layer.0": 485.5499, "encoder_q-layer.1": 562.6494, "encoder_q-layer.10": 175.6682, "encoder_q-layer.11": 500.7885, "encoder_q-layer.2": 632.0475, "encoder_q-layer.3": 646.5506, "encoder_q-layer.4": 706.018, "encoder_q-layer.5": 625.3713, "encoder_q-layer.6": 499.8733, "encoder_q-layer.7": 290.684, "encoder_q-layer.8": 210.4766, "encoder_q-layer.9": 173.979, "epoch": 0.24, "inbatch_neg_score": 0.2117, "inbatch_pos_score": 0.7959, "learning_rate": 4.194444444444445e-05, "loss": 3.8025, "norm_diff": 0.0095, "norm_loss": 0.0, "num_token_doc": 66.8949, "num_token_overlap": 14.6318, "num_token_query": 37.2479, "num_token_union": 65.3772, "num_word_context": 202.054, "num_word_doc": 49.9171, "num_word_query": 27.8841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 774.326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.21, "query_norm": 1.3965, "queue_k_norm": 1.4034, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2479, "sent_len_1": 66.8949, "sent_len_max_0": 128.0, "sent_len_max_1": 188.79, "stdk": 0.0472, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7966, "doc_norm": 1.4015, "encoder_q-embeddings": 462.9471, "encoder_q-layer.0": 315.2393, "encoder_q-layer.1": 370.4301, "encoder_q-layer.10": 178.1632, "encoder_q-layer.11": 460.2698, "encoder_q-layer.2": 434.9694, "encoder_q-layer.3": 465.4546, "encoder_q-layer.4": 417.9887, "encoder_q-layer.5": 347.989, "encoder_q-layer.6": 351.2493, "encoder_q-layer.7": 289.1091, "encoder_q-layer.8": 232.4207, "encoder_q-layer.9": 158.1851, "epoch": 0.24, "inbatch_neg_score": 0.1986, "inbatch_pos_score": 0.7671, "learning_rate": 4.188888888888889e-05, "loss": 3.7966, "norm_diff": 0.023, "norm_loss": 0.0, "num_token_doc": 66.8194, "num_token_overlap": 14.6234, "num_token_query": 37.3807, "num_token_union": 65.3627, "num_word_context": 202.2878, "num_word_doc": 49.8033, "num_word_query": 27.9758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 552.1217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1991, "query_norm": 1.3869, "queue_k_norm": 1.4026, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3807, "sent_len_1": 66.8194, "sent_len_max_0": 128.0, "sent_len_max_1": 190.705, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.7839, "doc_norm": 1.402, "encoder_q-embeddings": 834.6636, "encoder_q-layer.0": 591.0346, "encoder_q-layer.1": 728.3869, "encoder_q-layer.10": 203.239, "encoder_q-layer.11": 508.4538, "encoder_q-layer.2": 895.8961, "encoder_q-layer.3": 873.5789, "encoder_q-layer.4": 892.5411, "encoder_q-layer.5": 764.897, "encoder_q-layer.6": 643.8934, "encoder_q-layer.7": 454.9479, "encoder_q-layer.8": 288.2585, "encoder_q-layer.9": 181.6469, "epoch": 0.24, "inbatch_neg_score": 0.2072, "inbatch_pos_score": 0.8462, "learning_rate": 4.183333333333334e-05, "loss": 3.7839, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.7335, "num_token_overlap": 14.6123, "num_token_query": 37.4138, "num_token_union": 65.408, "num_word_context": 202.3488, "num_word_doc": 49.8154, "num_word_query": 28.0374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 978.2472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2064, "query_norm": 1.4415, "queue_k_norm": 1.4021, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4138, "sent_len_1": 66.7335, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5613, "stdk": 0.0476, "stdq": 0.046, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7956, "doc_norm": 1.4014, "encoder_q-embeddings": 828.1437, "encoder_q-layer.0": 635.2032, "encoder_q-layer.1": 816.17, "encoder_q-layer.10": 185.7534, "encoder_q-layer.11": 481.101, "encoder_q-layer.2": 726.9014, "encoder_q-layer.3": 647.3659, "encoder_q-layer.4": 635.7042, "encoder_q-layer.5": 600.2549, "encoder_q-layer.6": 537.6353, "encoder_q-layer.7": 399.9737, "encoder_q-layer.8": 247.4667, "encoder_q-layer.9": 165.4508, "epoch": 0.24, "inbatch_neg_score": 0.1984, "inbatch_pos_score": 0.7891, "learning_rate": 4.177777777777778e-05, "loss": 3.7956, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.8609, "num_token_overlap": 14.6007, "num_token_query": 37.2503, "num_token_union": 65.3658, "num_word_context": 202.1727, "num_word_doc": 49.8457, "num_word_query": 27.8785, "postclip_grad_norm": 1.0, "preclip_grad_norm": 878.1471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1985, "query_norm": 1.3998, "queue_k_norm": 1.4007, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2503, "sent_len_1": 66.8609, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.7163, "stdk": 0.0476, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8093, "doc_norm": 1.3979, "encoder_q-embeddings": 1861.313, "encoder_q-layer.0": 1288.7942, "encoder_q-layer.1": 1284.1606, "encoder_q-layer.10": 194.4935, "encoder_q-layer.11": 552.2143, "encoder_q-layer.2": 1568.1146, "encoder_q-layer.3": 1411.2744, "encoder_q-layer.4": 1289.1949, "encoder_q-layer.5": 1179.8304, "encoder_q-layer.6": 1175.9124, "encoder_q-layer.7": 923.4168, "encoder_q-layer.8": 507.0618, "encoder_q-layer.9": 197.7837, "epoch": 0.24, "inbatch_neg_score": 0.2005, "inbatch_pos_score": 0.7861, "learning_rate": 4.172222222222222e-05, "loss": 3.8093, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.8532, "num_token_overlap": 14.5052, "num_token_query": 37.1735, "num_token_union": 65.4456, "num_word_context": 202.6035, "num_word_doc": 49.8643, "num_word_query": 27.8445, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1792.2231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1989, "query_norm": 1.396, "queue_k_norm": 1.3988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1735, "sent_len_1": 66.8532, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9487, "stdk": 0.0475, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7851, "doc_norm": 1.3914, "encoder_q-embeddings": 440.9758, "encoder_q-layer.0": 325.9167, "encoder_q-layer.1": 376.8126, "encoder_q-layer.10": 184.7399, "encoder_q-layer.11": 492.2755, "encoder_q-layer.2": 373.7747, "encoder_q-layer.3": 278.3315, "encoder_q-layer.4": 244.4728, "encoder_q-layer.5": 213.2629, "encoder_q-layer.6": 229.7868, "encoder_q-layer.7": 208.9124, "encoder_q-layer.8": 200.9166, "encoder_q-layer.9": 164.5529, "epoch": 0.24, "inbatch_neg_score": 0.2023, "inbatch_pos_score": 0.7715, "learning_rate": 4.166666666666667e-05, "loss": 3.7851, "norm_diff": 0.0148, "norm_loss": 0.0, "num_token_doc": 66.8282, "num_token_overlap": 14.6681, "num_token_query": 37.3792, "num_token_union": 65.3407, "num_word_context": 202.3447, "num_word_doc": 49.8588, "num_word_query": 27.9712, "postclip_grad_norm": 1.0, "preclip_grad_norm": 474.4031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.201, "query_norm": 1.3776, "queue_k_norm": 1.3992, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3792, "sent_len_1": 66.8282, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.4563, "stdk": 0.0473, "stdq": 0.0439, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7875, "doc_norm": 1.4023, "encoder_q-embeddings": 445.5276, "encoder_q-layer.0": 323.0808, "encoder_q-layer.1": 338.3935, "encoder_q-layer.10": 315.6598, "encoder_q-layer.11": 926.993, "encoder_q-layer.2": 368.4425, "encoder_q-layer.3": 382.888, "encoder_q-layer.4": 364.3746, "encoder_q-layer.5": 295.8073, "encoder_q-layer.6": 309.765, "encoder_q-layer.7": 316.4372, "encoder_q-layer.8": 331.1194, "encoder_q-layer.9": 293.6845, "epoch": 0.25, "inbatch_neg_score": 0.2065, "inbatch_pos_score": 0.8057, "learning_rate": 4.1611111111111114e-05, "loss": 3.7875, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.8889, "num_token_overlap": 14.6517, "num_token_query": 37.3908, "num_token_union": 65.4457, "num_word_context": 202.4722, "num_word_doc": 49.9084, "num_word_query": 27.9417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 661.093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.205, "query_norm": 1.3766, "queue_k_norm": 1.3994, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3908, "sent_len_1": 66.8889, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0475, "stdk": 0.0477, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7788, "doc_norm": 1.3952, "encoder_q-embeddings": 1990.484, "encoder_q-layer.0": 1387.8011, "encoder_q-layer.1": 1559.1545, "encoder_q-layer.10": 338.7137, "encoder_q-layer.11": 903.2589, "encoder_q-layer.2": 1840.1483, "encoder_q-layer.3": 1989.9392, "encoder_q-layer.4": 2225.1504, "encoder_q-layer.5": 1580.9939, "encoder_q-layer.6": 1194.1813, "encoder_q-layer.7": 767.9145, "encoder_q-layer.8": 446.5524, "encoder_q-layer.9": 316.7332, "epoch": 0.25, "inbatch_neg_score": 0.2197, "inbatch_pos_score": 0.8242, "learning_rate": 4.155555555555556e-05, "loss": 3.7788, "norm_diff": 0.0178, "norm_loss": 0.0, "num_token_doc": 66.9137, "num_token_overlap": 14.5917, "num_token_query": 37.3866, "num_token_union": 65.5136, "num_word_context": 202.4123, "num_word_doc": 49.921, "num_word_query": 27.9989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2153.1111, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2189, "query_norm": 1.413, "queue_k_norm": 1.3968, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3866, "sent_len_1": 66.9137, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2275, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7884, "doc_norm": 1.3965, "encoder_q-embeddings": 10077.0312, "encoder_q-layer.0": 7811.5586, "encoder_q-layer.1": 7214.4014, "encoder_q-layer.10": 336.453, "encoder_q-layer.11": 966.4173, "encoder_q-layer.2": 7821.5532, "encoder_q-layer.3": 8341.373, "encoder_q-layer.4": 9361.8848, "encoder_q-layer.5": 8996.6436, "encoder_q-layer.6": 4712.0977, "encoder_q-layer.7": 1838.0118, "encoder_q-layer.8": 798.8232, "encoder_q-layer.9": 370.3025, "epoch": 0.25, "inbatch_neg_score": 0.2233, "inbatch_pos_score": 0.8022, "learning_rate": 4.15e-05, "loss": 3.7884, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.8816, "num_token_overlap": 14.5568, "num_token_query": 37.3033, "num_token_union": 65.4615, "num_word_context": 202.5123, "num_word_doc": 49.9172, "num_word_query": 27.9055, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10031.6212, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2222, "query_norm": 1.3974, "queue_k_norm": 1.3992, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3033, "sent_len_1": 66.8816, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5037, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7976, "doc_norm": 1.3917, "encoder_q-embeddings": 565.603, "encoder_q-layer.0": 408.4198, "encoder_q-layer.1": 449.4654, "encoder_q-layer.10": 352.3084, "encoder_q-layer.11": 887.9566, "encoder_q-layer.2": 491.5544, "encoder_q-layer.3": 529.1467, "encoder_q-layer.4": 533.5861, "encoder_q-layer.5": 495.5703, "encoder_q-layer.6": 460.5767, "encoder_q-layer.7": 458.6958, "encoder_q-layer.8": 418.5854, "encoder_q-layer.9": 308.8611, "epoch": 0.25, "inbatch_neg_score": 0.2301, "inbatch_pos_score": 0.8193, "learning_rate": 4.144444444444445e-05, "loss": 3.7976, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.9336, "num_token_overlap": 14.6122, "num_token_query": 37.3898, "num_token_union": 65.4802, "num_word_context": 202.5418, "num_word_doc": 49.9262, "num_word_query": 27.9759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 772.6113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2288, "query_norm": 1.4092, "queue_k_norm": 1.3989, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3898, "sent_len_1": 66.9336, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.2962, "stdk": 0.0473, "stdq": 0.0438, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7677, "doc_norm": 1.4024, "encoder_q-embeddings": 602.8524, "encoder_q-layer.0": 427.8875, "encoder_q-layer.1": 478.0962, "encoder_q-layer.10": 313.683, "encoder_q-layer.11": 879.6494, "encoder_q-layer.2": 485.5404, "encoder_q-layer.3": 571.9654, "encoder_q-layer.4": 534.3129, "encoder_q-layer.5": 510.2124, "encoder_q-layer.6": 392.9542, "encoder_q-layer.7": 376.9362, "encoder_q-layer.8": 363.1094, "encoder_q-layer.9": 302.917, "epoch": 0.25, "inbatch_neg_score": 0.2327, "inbatch_pos_score": 0.8379, "learning_rate": 4.138888888888889e-05, "loss": 3.7677, "norm_diff": 0.0173, "norm_loss": 0.0, "num_token_doc": 66.8425, "num_token_overlap": 14.6233, "num_token_query": 37.2636, "num_token_union": 65.3616, "num_word_context": 202.0198, "num_word_doc": 49.858, "num_word_query": 27.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 789.2353, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2334, "query_norm": 1.4152, "queue_k_norm": 1.3967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2636, "sent_len_1": 66.8425, "sent_len_max_0": 128.0, "sent_len_max_1": 189.05, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.7949, "doc_norm": 1.4027, "encoder_q-embeddings": 846.4724, "encoder_q-layer.0": 662.2455, "encoder_q-layer.1": 735.2465, "encoder_q-layer.10": 385.1265, "encoder_q-layer.11": 920.1035, "encoder_q-layer.2": 827.34, "encoder_q-layer.3": 840.0137, "encoder_q-layer.4": 804.9598, "encoder_q-layer.5": 619.3851, "encoder_q-layer.6": 551.1953, "encoder_q-layer.7": 490.541, "encoder_q-layer.8": 399.0687, "encoder_q-layer.9": 324.2996, "epoch": 0.25, "inbatch_neg_score": 0.2416, "inbatch_pos_score": 0.8545, "learning_rate": 4.133333333333333e-05, "loss": 3.7949, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.6092, "num_token_overlap": 14.5491, "num_token_query": 37.1377, "num_token_union": 65.2152, "num_word_context": 202.0941, "num_word_doc": 49.7369, "num_word_query": 27.8272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1023.5571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2419, "query_norm": 1.4378, "queue_k_norm": 1.3979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1377, "sent_len_1": 66.6092, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6438, "stdk": 0.0477, "stdq": 0.0449, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.781, "doc_norm": 1.3989, "encoder_q-embeddings": 383.9446, "encoder_q-layer.0": 277.9706, "encoder_q-layer.1": 307.1631, "encoder_q-layer.10": 193.6171, "encoder_q-layer.11": 457.5887, "encoder_q-layer.2": 375.8296, "encoder_q-layer.3": 434.3334, "encoder_q-layer.4": 404.9627, "encoder_q-layer.5": 322.5393, "encoder_q-layer.6": 271.9153, "encoder_q-layer.7": 276.5935, "encoder_q-layer.8": 275.9697, "encoder_q-layer.9": 200.1061, "epoch": 0.25, "inbatch_neg_score": 0.2504, "inbatch_pos_score": 0.8452, "learning_rate": 4.127777777777778e-05, "loss": 3.781, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.6257, "num_token_overlap": 14.6443, "num_token_query": 37.4759, "num_token_union": 65.3164, "num_word_context": 202.1275, "num_word_doc": 49.7143, "num_word_query": 28.05, "postclip_grad_norm": 1.0, "preclip_grad_norm": 504.1247, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2502, "query_norm": 1.4045, "queue_k_norm": 1.3983, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4759, "sent_len_1": 66.6257, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.4913, "stdk": 0.0475, "stdq": 0.0439, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7893, "doc_norm": 1.4028, "encoder_q-embeddings": 736.689, "encoder_q-layer.0": 553.7579, "encoder_q-layer.1": 574.2682, "encoder_q-layer.10": 176.7916, "encoder_q-layer.11": 503.9223, "encoder_q-layer.2": 538.0725, "encoder_q-layer.3": 508.1497, "encoder_q-layer.4": 498.6472, "encoder_q-layer.5": 440.1898, "encoder_q-layer.6": 399.3075, "encoder_q-layer.7": 337.0915, "encoder_q-layer.8": 277.5264, "encoder_q-layer.9": 180.6828, "epoch": 0.25, "inbatch_neg_score": 0.2392, "inbatch_pos_score": 0.8223, "learning_rate": 4.1222222222222224e-05, "loss": 3.7893, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.7815, "num_token_overlap": 14.5945, "num_token_query": 37.327, "num_token_union": 65.3432, "num_word_context": 202.4894, "num_word_doc": 49.8333, "num_word_query": 27.9367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 722.197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2379, "query_norm": 1.4161, "queue_k_norm": 1.3998, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.327, "sent_len_1": 66.7815, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4812, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7594, "doc_norm": 1.3936, "encoder_q-embeddings": 1804.7078, "encoder_q-layer.0": 1304.1561, "encoder_q-layer.1": 1387.2018, "encoder_q-layer.10": 170.8734, "encoder_q-layer.11": 443.859, "encoder_q-layer.2": 1459.2334, "encoder_q-layer.3": 1543.0331, "encoder_q-layer.4": 1625.2125, "encoder_q-layer.5": 1384.3436, "encoder_q-layer.6": 1371.9711, "encoder_q-layer.7": 1368.8862, "encoder_q-layer.8": 1185.1852, "encoder_q-layer.9": 380.7771, "epoch": 0.25, "inbatch_neg_score": 0.2525, "inbatch_pos_score": 0.8618, "learning_rate": 4.116666666666667e-05, "loss": 3.7594, "norm_diff": 0.0404, "norm_loss": 0.0, "num_token_doc": 66.695, "num_token_overlap": 14.6202, "num_token_query": 37.5154, "num_token_union": 65.4299, "num_word_context": 202.2094, "num_word_doc": 49.7635, "num_word_query": 28.0859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1952.6356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.4339, "queue_k_norm": 1.3996, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5154, "sent_len_1": 66.695, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.05, "stdk": 0.0472, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7834, "doc_norm": 1.4055, "encoder_q-embeddings": 666.9628, "encoder_q-layer.0": 493.813, "encoder_q-layer.1": 567.9277, "encoder_q-layer.10": 169.4871, "encoder_q-layer.11": 467.2582, "encoder_q-layer.2": 601.9444, "encoder_q-layer.3": 507.7478, "encoder_q-layer.4": 546.6137, "encoder_q-layer.5": 508.8934, "encoder_q-layer.6": 492.1304, "encoder_q-layer.7": 362.5933, "encoder_q-layer.8": 265.6126, "encoder_q-layer.9": 170.7378, "epoch": 0.25, "inbatch_neg_score": 0.2485, "inbatch_pos_score": 0.8657, "learning_rate": 4.111111111111111e-05, "loss": 3.7834, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.7636, "num_token_overlap": 14.5976, "num_token_query": 37.3263, "num_token_union": 65.3424, "num_word_context": 202.5537, "num_word_doc": 49.8124, "num_word_query": 27.9434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 724.0028, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.248, "query_norm": 1.4172, "queue_k_norm": 1.4002, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3263, "sent_len_1": 66.7636, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1262, "stdk": 0.0476, "stdq": 0.0445, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.785, "doc_norm": 1.3984, "encoder_q-embeddings": 2544.1101, "encoder_q-layer.0": 1659.1614, "encoder_q-layer.1": 1987.0925, "encoder_q-layer.10": 177.1936, "encoder_q-layer.11": 465.9974, "encoder_q-layer.2": 2305.781, "encoder_q-layer.3": 2246.9634, "encoder_q-layer.4": 2046.5907, "encoder_q-layer.5": 1805.0455, "encoder_q-layer.6": 1479.6667, "encoder_q-layer.7": 1027.5662, "encoder_q-layer.8": 561.0496, "encoder_q-layer.9": 204.4714, "epoch": 0.25, "inbatch_neg_score": 0.2623, "inbatch_pos_score": 0.8379, "learning_rate": 4.105555555555556e-05, "loss": 3.785, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.6242, "num_token_overlap": 14.6143, "num_token_query": 37.4042, "num_token_union": 65.3312, "num_word_context": 202.1867, "num_word_doc": 49.6855, "num_word_query": 28.0182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2558.4815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2634, "query_norm": 1.3979, "queue_k_norm": 1.4001, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4042, "sent_len_1": 66.6242, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1987, "stdk": 0.0473, "stdq": 0.0432, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7524, "doc_norm": 1.4001, "encoder_q-embeddings": 2667.7249, "encoder_q-layer.0": 1968.5203, "encoder_q-layer.1": 2204.9888, "encoder_q-layer.10": 166.9794, "encoder_q-layer.11": 497.3602, "encoder_q-layer.2": 2560.886, "encoder_q-layer.3": 2477.6104, "encoder_q-layer.4": 2668.8848, "encoder_q-layer.5": 2695.2063, "encoder_q-layer.6": 1811.1277, "encoder_q-layer.7": 1146.5906, "encoder_q-layer.8": 572.558, "encoder_q-layer.9": 206.3262, "epoch": 0.26, "inbatch_neg_score": 0.2479, "inbatch_pos_score": 0.8403, "learning_rate": 4.1e-05, "loss": 3.7524, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.6906, "num_token_overlap": 14.6072, "num_token_query": 37.4068, "num_token_union": 65.3184, "num_word_context": 202.1352, "num_word_doc": 49.7508, "num_word_query": 27.9895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2892.73, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2461, "query_norm": 1.409, "queue_k_norm": 1.4017, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4068, "sent_len_1": 66.6906, "sent_len_max_0": 128.0, "sent_len_max_1": 190.76, "stdk": 0.0473, "stdq": 0.0439, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7526, "doc_norm": 1.3995, "encoder_q-embeddings": 515.9867, "encoder_q-layer.0": 377.1602, "encoder_q-layer.1": 458.267, "encoder_q-layer.10": 162.3448, "encoder_q-layer.11": 439.9185, "encoder_q-layer.2": 516.0764, "encoder_q-layer.3": 584.8654, "encoder_q-layer.4": 604.861, "encoder_q-layer.5": 516.5463, "encoder_q-layer.6": 510.9483, "encoder_q-layer.7": 526.3119, "encoder_q-layer.8": 351.152, "encoder_q-layer.9": 161.8898, "epoch": 0.26, "inbatch_neg_score": 0.2383, "inbatch_pos_score": 0.8218, "learning_rate": 4.094444444444445e-05, "loss": 3.7526, "norm_diff": 0.0172, "norm_loss": 0.0, "num_token_doc": 67.1952, "num_token_overlap": 14.6962, "num_token_query": 37.5785, "num_token_union": 65.6608, "num_word_context": 202.7373, "num_word_doc": 50.1459, "num_word_query": 28.144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 688.1178, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2383, "query_norm": 1.4059, "queue_k_norm": 1.4024, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5785, "sent_len_1": 67.1952, "sent_len_max_0": 127.995, "sent_len_max_1": 189.6488, "stdk": 0.0473, "stdq": 0.044, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7774, "doc_norm": 1.3989, "encoder_q-embeddings": 465.6118, "encoder_q-layer.0": 313.4625, "encoder_q-layer.1": 362.7077, "encoder_q-layer.10": 154.3303, "encoder_q-layer.11": 444.0227, "encoder_q-layer.2": 400.8094, "encoder_q-layer.3": 433.8179, "encoder_q-layer.4": 417.2197, "encoder_q-layer.5": 426.0805, "encoder_q-layer.6": 402.1413, "encoder_q-layer.7": 413.0894, "encoder_q-layer.8": 348.0771, "encoder_q-layer.9": 155.8637, "epoch": 0.26, "inbatch_neg_score": 0.2372, "inbatch_pos_score": 0.8398, "learning_rate": 4.088888888888889e-05, "loss": 3.7774, "norm_diff": 0.0103, "norm_loss": 0.0, "num_token_doc": 66.7781, "num_token_overlap": 14.5374, "num_token_query": 37.1189, "num_token_union": 65.2869, "num_word_context": 202.3544, "num_word_doc": 49.8714, "num_word_query": 27.7877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 574.4246, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2362, "query_norm": 1.4019, "queue_k_norm": 1.4026, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1189, "sent_len_1": 66.7781, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4263, "stdk": 0.0473, "stdq": 0.0441, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.77, "doc_norm": 1.402, "encoder_q-embeddings": 671.1723, "encoder_q-layer.0": 508.4046, "encoder_q-layer.1": 583.7458, "encoder_q-layer.10": 179.2761, "encoder_q-layer.11": 466.6311, "encoder_q-layer.2": 412.2936, "encoder_q-layer.3": 331.2442, "encoder_q-layer.4": 325.3304, "encoder_q-layer.5": 295.8272, "encoder_q-layer.6": 293.5152, "encoder_q-layer.7": 262.0572, "encoder_q-layer.8": 235.5838, "encoder_q-layer.9": 156.7551, "epoch": 0.26, "inbatch_neg_score": 0.2329, "inbatch_pos_score": 0.8457, "learning_rate": 4.0833333333333334e-05, "loss": 3.77, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.6538, "num_token_overlap": 14.5301, "num_token_query": 37.1672, "num_token_union": 65.3042, "num_word_context": 202.0292, "num_word_doc": 49.7692, "num_word_query": 27.8204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 628.7797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.234, "query_norm": 1.4207, "queue_k_norm": 1.402, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1672, "sent_len_1": 66.6538, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7512, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7741, "doc_norm": 1.3994, "encoder_q-embeddings": 909.5698, "encoder_q-layer.0": 683.4091, "encoder_q-layer.1": 840.1863, "encoder_q-layer.10": 178.1621, "encoder_q-layer.11": 495.1865, "encoder_q-layer.2": 941.3015, "encoder_q-layer.3": 1041.2299, "encoder_q-layer.4": 1042.3652, "encoder_q-layer.5": 947.5065, "encoder_q-layer.6": 793.8994, "encoder_q-layer.7": 702.2231, "encoder_q-layer.8": 430.8093, "encoder_q-layer.9": 181.1703, "epoch": 0.26, "inbatch_neg_score": 0.2342, "inbatch_pos_score": 0.8218, "learning_rate": 4.0777777777777783e-05, "loss": 3.7741, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.6685, "num_token_overlap": 14.6063, "num_token_query": 37.3354, "num_token_union": 65.3586, "num_word_context": 202.0252, "num_word_doc": 49.779, "num_word_query": 27.9705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1148.44, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2343, "query_norm": 1.3869, "queue_k_norm": 1.4043, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3354, "sent_len_1": 66.6685, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7688, "stdk": 0.0473, "stdq": 0.0439, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.775, "doc_norm": 1.4025, "encoder_q-embeddings": 793.2769, "encoder_q-layer.0": 582.7296, "encoder_q-layer.1": 683.4202, "encoder_q-layer.10": 175.295, "encoder_q-layer.11": 464.2921, "encoder_q-layer.2": 774.15, "encoder_q-layer.3": 881.2292, "encoder_q-layer.4": 833.0485, "encoder_q-layer.5": 743.4492, "encoder_q-layer.6": 746.123, "encoder_q-layer.7": 553.8174, "encoder_q-layer.8": 281.9532, "encoder_q-layer.9": 173.8963, "epoch": 0.26, "inbatch_neg_score": 0.2353, "inbatch_pos_score": 0.8281, "learning_rate": 4.0722222222222226e-05, "loss": 3.775, "norm_diff": 0.0111, "norm_loss": 0.0, "num_token_doc": 66.5194, "num_token_overlap": 14.6331, "num_token_query": 37.4965, "num_token_union": 65.2727, "num_word_context": 202.2406, "num_word_doc": 49.646, "num_word_query": 28.073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 948.0837, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2345, "query_norm": 1.3925, "queue_k_norm": 1.4022, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4965, "sent_len_1": 66.5194, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.96, "stdk": 0.0474, "stdq": 0.0444, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7876, "doc_norm": 1.4144, "encoder_q-embeddings": 5244.0508, "encoder_q-layer.0": 3908.9514, "encoder_q-layer.1": 4085.967, "encoder_q-layer.10": 211.3357, "encoder_q-layer.11": 506.4031, "encoder_q-layer.2": 4667.2021, "encoder_q-layer.3": 5082.5371, "encoder_q-layer.4": 5056.4487, "encoder_q-layer.5": 5326.7847, "encoder_q-layer.6": 6681.8823, "encoder_q-layer.7": 2960.2744, "encoder_q-layer.8": 1028.9119, "encoder_q-layer.9": 252.9684, "epoch": 0.26, "inbatch_neg_score": 0.2346, "inbatch_pos_score": 0.8521, "learning_rate": 4.066666666666667e-05, "loss": 3.7876, "norm_diff": 0.0256, "norm_loss": 0.0, "num_token_doc": 66.9412, "num_token_overlap": 14.5999, "num_token_query": 37.2593, "num_token_union": 65.42, "num_word_context": 202.0495, "num_word_doc": 49.9665, "num_word_query": 27.8768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6138.0191, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2363, "query_norm": 1.3888, "queue_k_norm": 1.403, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2593, "sent_len_1": 66.9412, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7138, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.7593, "doc_norm": 1.4033, "encoder_q-embeddings": 452.1933, "encoder_q-layer.0": 313.1213, "encoder_q-layer.1": 342.15, "encoder_q-layer.10": 167.237, "encoder_q-layer.11": 468.1523, "encoder_q-layer.2": 324.8805, "encoder_q-layer.3": 325.8997, "encoder_q-layer.4": 339.0945, "encoder_q-layer.5": 307.1722, "encoder_q-layer.6": 312.8499, "encoder_q-layer.7": 254.4916, "encoder_q-layer.8": 203.0734, "encoder_q-layer.9": 161.6798, "epoch": 0.26, "inbatch_neg_score": 0.2344, "inbatch_pos_score": 0.8457, "learning_rate": 4.061111111111111e-05, "loss": 3.7593, "norm_diff": 0.0212, "norm_loss": 0.0, "num_token_doc": 66.9146, "num_token_overlap": 14.6604, "num_token_query": 37.512, "num_token_union": 65.51, "num_word_context": 202.7556, "num_word_doc": 49.8746, "num_word_query": 28.0802, "postclip_grad_norm": 1.0, "preclip_grad_norm": 491.243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2341, "query_norm": 1.3842, "queue_k_norm": 1.4049, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.512, "sent_len_1": 66.9146, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6025, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.757, "doc_norm": 1.4178, "encoder_q-embeddings": 347.355, "encoder_q-layer.0": 247.7711, "encoder_q-layer.1": 268.0038, "encoder_q-layer.10": 173.8099, "encoder_q-layer.11": 476.0016, "encoder_q-layer.2": 299.6014, "encoder_q-layer.3": 278.014, "encoder_q-layer.4": 278.778, "encoder_q-layer.5": 255.7638, "encoder_q-layer.6": 235.8092, "encoder_q-layer.7": 210.452, "encoder_q-layer.8": 201.5529, "encoder_q-layer.9": 161.5492, "epoch": 0.26, "inbatch_neg_score": 0.2338, "inbatch_pos_score": 0.8145, "learning_rate": 4.055555555555556e-05, "loss": 3.757, "norm_diff": 0.0232, "norm_loss": 0.0, "num_token_doc": 66.9126, "num_token_overlap": 14.5978, "num_token_query": 37.4204, "num_token_union": 65.5502, "num_word_context": 202.347, "num_word_doc": 49.9582, "num_word_query": 27.99, "postclip_grad_norm": 1.0, "preclip_grad_norm": 428.6788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2334, "query_norm": 1.3946, "queue_k_norm": 1.4031, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4204, "sent_len_1": 66.9126, "sent_len_max_0": 128.0, "sent_len_max_1": 188.86, "stdk": 0.048, "stdq": 0.0442, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7465, "doc_norm": 1.4116, "encoder_q-embeddings": 177.3359, "encoder_q-layer.0": 120.0541, "encoder_q-layer.1": 120.6492, "encoder_q-layer.10": 150.9846, "encoder_q-layer.11": 452.0565, "encoder_q-layer.2": 134.0803, "encoder_q-layer.3": 139.7683, "encoder_q-layer.4": 149.7465, "encoder_q-layer.5": 143.2651, "encoder_q-layer.6": 131.9585, "encoder_q-layer.7": 144.3944, "encoder_q-layer.8": 160.2988, "encoder_q-layer.9": 141.5711, "epoch": 0.26, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.8428, "learning_rate": 4.05e-05, "loss": 3.7465, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.7823, "num_token_overlap": 14.6539, "num_token_query": 37.5649, "num_token_union": 65.5041, "num_word_context": 202.4094, "num_word_doc": 49.8289, "num_word_query": 28.1646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 298.5427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2329, "query_norm": 1.3904, "queue_k_norm": 1.405, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5649, "sent_len_1": 66.7823, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.8288, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.7752, "doc_norm": 1.3985, "encoder_q-embeddings": 383.9938, "encoder_q-layer.0": 278.2014, "encoder_q-layer.1": 316.6833, "encoder_q-layer.10": 178.2513, "encoder_q-layer.11": 473.3224, "encoder_q-layer.2": 349.6494, "encoder_q-layer.3": 354.1031, "encoder_q-layer.4": 329.716, "encoder_q-layer.5": 271.4368, "encoder_q-layer.6": 232.138, "encoder_q-layer.7": 192.5632, "encoder_q-layer.8": 182.1508, "encoder_q-layer.9": 157.7787, "epoch": 0.27, "inbatch_neg_score": 0.2376, "inbatch_pos_score": 0.8101, "learning_rate": 4.0444444444444444e-05, "loss": 3.7752, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.4515, "num_token_overlap": 14.584, "num_token_query": 37.3275, "num_token_union": 65.1941, "num_word_context": 202.2025, "num_word_doc": 49.5755, "num_word_query": 27.9174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 461.3557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.3826, "queue_k_norm": 1.4031, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3275, "sent_len_1": 66.4515, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6425, "stdk": 0.0473, "stdq": 0.0434, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7784, "doc_norm": 1.3988, "encoder_q-embeddings": 382.9753, "encoder_q-layer.0": 259.855, "encoder_q-layer.1": 302.584, "encoder_q-layer.10": 161.6988, "encoder_q-layer.11": 453.0594, "encoder_q-layer.2": 363.3654, "encoder_q-layer.3": 415.027, "encoder_q-layer.4": 417.6109, "encoder_q-layer.5": 353.8203, "encoder_q-layer.6": 247.7332, "encoder_q-layer.7": 203.4166, "encoder_q-layer.8": 182.7527, "encoder_q-layer.9": 153.5334, "epoch": 0.27, "inbatch_neg_score": 0.2358, "inbatch_pos_score": 0.8149, "learning_rate": 4.038888888888889e-05, "loss": 3.7784, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.7718, "num_token_overlap": 14.5605, "num_token_query": 37.321, "num_token_union": 65.4073, "num_word_context": 202.2528, "num_word_doc": 49.8335, "num_word_query": 27.9474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 479.7572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2355, "query_norm": 1.3806, "queue_k_norm": 1.4043, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.321, "sent_len_1": 66.7718, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6887, "stdk": 0.0473, "stdq": 0.0435, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.7656, "doc_norm": 1.3973, "encoder_q-embeddings": 526.2213, "encoder_q-layer.0": 353.4065, "encoder_q-layer.1": 397.7349, "encoder_q-layer.10": 186.938, "encoder_q-layer.11": 523.1843, "encoder_q-layer.2": 465.5736, "encoder_q-layer.3": 521.5166, "encoder_q-layer.4": 476.487, "encoder_q-layer.5": 426.617, "encoder_q-layer.6": 385.1891, "encoder_q-layer.7": 259.5783, "encoder_q-layer.8": 205.567, "encoder_q-layer.9": 175.1472, "epoch": 0.27, "inbatch_neg_score": 0.2416, "inbatch_pos_score": 0.835, "learning_rate": 4.0333333333333336e-05, "loss": 3.7656, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.5692, "num_token_overlap": 14.5405, "num_token_query": 37.4213, "num_token_union": 65.3472, "num_word_context": 202.3505, "num_word_doc": 49.6971, "num_word_query": 28.0457, "postclip_grad_norm": 1.0, "preclip_grad_norm": 605.5233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2421, "query_norm": 1.3951, "queue_k_norm": 1.4018, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4213, "sent_len_1": 66.5692, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.89, "stdk": 0.0472, "stdq": 0.0439, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.7322, "doc_norm": 1.4016, "encoder_q-embeddings": 335.9836, "encoder_q-layer.0": 254.1497, "encoder_q-layer.1": 265.1375, "encoder_q-layer.10": 161.2401, "encoder_q-layer.11": 442.6339, "encoder_q-layer.2": 309.8042, "encoder_q-layer.3": 348.2184, "encoder_q-layer.4": 330.3524, "encoder_q-layer.5": 324.1748, "encoder_q-layer.6": 313.5691, "encoder_q-layer.7": 254.3154, "encoder_q-layer.8": 191.7922, "encoder_q-layer.9": 150.6496, "epoch": 0.27, "inbatch_neg_score": 0.2452, "inbatch_pos_score": 0.8584, "learning_rate": 4.027777777777778e-05, "loss": 3.7322, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.9948, "num_token_overlap": 14.6041, "num_token_query": 37.3976, "num_token_union": 65.5489, "num_word_context": 203.039, "num_word_doc": 50.0312, "num_word_query": 28.0042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 447.3931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2458, "query_norm": 1.3942, "queue_k_norm": 1.4057, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3976, "sent_len_1": 66.9948, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.8787, "stdk": 0.0474, "stdq": 0.0439, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7658, "doc_norm": 1.4055, "encoder_q-embeddings": 186.7725, "encoder_q-layer.0": 124.6124, "encoder_q-layer.1": 133.8359, "encoder_q-layer.10": 157.0055, "encoder_q-layer.11": 455.7903, "encoder_q-layer.2": 148.0826, "encoder_q-layer.3": 162.85, "encoder_q-layer.4": 154.6267, "encoder_q-layer.5": 141.2454, "encoder_q-layer.6": 164.0383, "encoder_q-layer.7": 156.9758, "encoder_q-layer.8": 174.3349, "encoder_q-layer.9": 148.0264, "epoch": 0.27, "inbatch_neg_score": 0.2382, "inbatch_pos_score": 0.7993, "learning_rate": 4.022222222222222e-05, "loss": 3.7658, "norm_diff": 0.0572, "norm_loss": 0.0, "num_token_doc": 66.4741, "num_token_overlap": 14.55, "num_token_query": 37.2352, "num_token_union": 65.2012, "num_word_context": 202.0298, "num_word_doc": 49.6107, "num_word_query": 27.8748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 312.7648, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2383, "query_norm": 1.3483, "queue_k_norm": 1.4031, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2352, "sent_len_1": 66.4741, "sent_len_max_0": 127.995, "sent_len_max_1": 187.8013, "stdk": 0.0475, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7418, "doc_norm": 1.4037, "encoder_q-embeddings": 3656.5862, "encoder_q-layer.0": 2700.041, "encoder_q-layer.1": 3170.5759, "encoder_q-layer.10": 305.6871, "encoder_q-layer.11": 924.5167, "encoder_q-layer.2": 3436.9678, "encoder_q-layer.3": 4313.5757, "encoder_q-layer.4": 4725.4922, "encoder_q-layer.5": 3557.1704, "encoder_q-layer.6": 2049.5947, "encoder_q-layer.7": 1445.9292, "encoder_q-layer.8": 1015.4948, "encoder_q-layer.9": 466.8542, "epoch": 0.27, "inbatch_neg_score": 0.2402, "inbatch_pos_score": 0.7959, "learning_rate": 4.016666666666667e-05, "loss": 3.7418, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.8288, "num_token_overlap": 14.5629, "num_token_query": 37.224, "num_token_union": 65.3452, "num_word_context": 202.3354, "num_word_doc": 49.8159, "num_word_query": 27.867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4302.6834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.352, "queue_k_norm": 1.4052, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.224, "sent_len_1": 66.8288, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.6863, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.775, "doc_norm": 1.4018, "encoder_q-embeddings": 602.8273, "encoder_q-layer.0": 438.0788, "encoder_q-layer.1": 494.4604, "encoder_q-layer.10": 332.1571, "encoder_q-layer.11": 1005.9028, "encoder_q-layer.2": 524.9398, "encoder_q-layer.3": 529.801, "encoder_q-layer.4": 522.198, "encoder_q-layer.5": 436.8185, "encoder_q-layer.6": 382.4903, "encoder_q-layer.7": 357.9784, "encoder_q-layer.8": 361.9725, "encoder_q-layer.9": 316.8534, "epoch": 0.27, "inbatch_neg_score": 0.232, "inbatch_pos_score": 0.8604, "learning_rate": 4.011111111111111e-05, "loss": 3.775, "norm_diff": 0.0245, "norm_loss": 0.0, "num_token_doc": 66.5578, "num_token_overlap": 14.5695, "num_token_query": 37.3912, "num_token_union": 65.2683, "num_word_context": 202.3359, "num_word_doc": 49.6628, "num_word_query": 27.9881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 816.7831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2335, "query_norm": 1.4049, "queue_k_norm": 1.403, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3912, "sent_len_1": 66.5578, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3663, "stdk": 0.0474, "stdq": 0.0453, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.7443, "doc_norm": 1.4149, "encoder_q-embeddings": 22565.2285, "encoder_q-layer.0": 17270.082, "encoder_q-layer.1": 20409.5293, "encoder_q-layer.10": 327.0836, "encoder_q-layer.11": 886.6036, "encoder_q-layer.2": 24297.8945, "encoder_q-layer.3": 23125.4082, "encoder_q-layer.4": 18979.2227, "encoder_q-layer.5": 15813.4658, "encoder_q-layer.6": 13328.7363, "encoder_q-layer.7": 8244.1836, "encoder_q-layer.8": 4324.499, "encoder_q-layer.9": 983.308, "epoch": 0.27, "inbatch_neg_score": 0.2243, "inbatch_pos_score": 0.8506, "learning_rate": 4.0055555555555554e-05, "loss": 3.7443, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.868, "num_token_overlap": 14.6046, "num_token_query": 37.4343, "num_token_union": 65.488, "num_word_context": 202.362, "num_word_doc": 49.8917, "num_word_query": 28.0208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24140.7182, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2261, "query_norm": 1.3784, "queue_k_norm": 1.4035, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4343, "sent_len_1": 66.868, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0412, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.7498, "doc_norm": 1.3954, "encoder_q-embeddings": 806.9235, "encoder_q-layer.0": 568.3561, "encoder_q-layer.1": 601.3807, "encoder_q-layer.10": 305.0584, "encoder_q-layer.11": 926.6163, "encoder_q-layer.2": 673.7494, "encoder_q-layer.3": 697.4526, "encoder_q-layer.4": 700.9543, "encoder_q-layer.5": 618.6317, "encoder_q-layer.6": 517.2843, "encoder_q-layer.7": 453.0885, "encoder_q-layer.8": 424.0621, "encoder_q-layer.9": 331.0207, "epoch": 0.27, "inbatch_neg_score": 0.2162, "inbatch_pos_score": 0.814, "learning_rate": 4e-05, "loss": 3.7498, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.7893, "num_token_overlap": 14.6005, "num_token_query": 37.2761, "num_token_union": 65.3217, "num_word_context": 202.2595, "num_word_doc": 49.8193, "num_word_query": 27.885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.0481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2163, "query_norm": 1.3481, "queue_k_norm": 1.4017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2761, "sent_len_1": 66.7893, "sent_len_max_0": 127.995, "sent_len_max_1": 189.49, "stdk": 0.0472, "stdq": 0.0437, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.75, "doc_norm": 1.4003, "encoder_q-embeddings": 681.4519, "encoder_q-layer.0": 455.621, "encoder_q-layer.1": 553.6397, "encoder_q-layer.10": 349.0757, "encoder_q-layer.11": 983.6428, "encoder_q-layer.2": 669.2405, "encoder_q-layer.3": 704.9963, "encoder_q-layer.4": 681.5891, "encoder_q-layer.5": 520.6181, "encoder_q-layer.6": 454.0807, "encoder_q-layer.7": 391.8589, "encoder_q-layer.8": 415.8964, "encoder_q-layer.9": 337.4961, "epoch": 0.27, "inbatch_neg_score": 0.2203, "inbatch_pos_score": 0.7954, "learning_rate": 3.9944444444444446e-05, "loss": 3.75, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.8821, "num_token_overlap": 14.5862, "num_token_query": 37.3425, "num_token_union": 65.4992, "num_word_context": 202.377, "num_word_doc": 49.8875, "num_word_query": 27.9321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 888.0533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2203, "query_norm": 1.338, "queue_k_norm": 1.4016, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3425, "sent_len_1": 66.8821, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9888, "stdk": 0.0474, "stdq": 0.0431, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.735, "doc_norm": 1.4009, "encoder_q-embeddings": 1082.5809, "encoder_q-layer.0": 758.2712, "encoder_q-layer.1": 923.0454, "encoder_q-layer.10": 351.6552, "encoder_q-layer.11": 906.9444, "encoder_q-layer.2": 926.528, "encoder_q-layer.3": 991.0192, "encoder_q-layer.4": 899.3187, "encoder_q-layer.5": 648.1127, "encoder_q-layer.6": 642.3962, "encoder_q-layer.7": 557.4304, "encoder_q-layer.8": 482.8574, "encoder_q-layer.9": 330.5477, "epoch": 0.28, "inbatch_neg_score": 0.2151, "inbatch_pos_score": 0.8301, "learning_rate": 3.9888888888888895e-05, "loss": 3.735, "norm_diff": 0.0409, "norm_loss": 0.0, "num_token_doc": 66.6061, "num_token_overlap": 14.6363, "num_token_query": 37.3915, "num_token_union": 65.3079, "num_word_context": 202.3125, "num_word_doc": 49.7025, "num_word_query": 28.0269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1186.0764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2163, "query_norm": 1.36, "queue_k_norm": 1.399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3915, "sent_len_1": 66.6061, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7402, "doc_norm": 1.403, "encoder_q-embeddings": 1006.1425, "encoder_q-layer.0": 716.0162, "encoder_q-layer.1": 805.2888, "encoder_q-layer.10": 317.1721, "encoder_q-layer.11": 903.1122, "encoder_q-layer.2": 921.0303, "encoder_q-layer.3": 939.7803, "encoder_q-layer.4": 938.1648, "encoder_q-layer.5": 819.0337, "encoder_q-layer.6": 610.1057, "encoder_q-layer.7": 471.5508, "encoder_q-layer.8": 381.7111, "encoder_q-layer.9": 300.5254, "epoch": 0.28, "inbatch_neg_score": 0.2193, "inbatch_pos_score": 0.8057, "learning_rate": 3.983333333333333e-05, "loss": 3.7402, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.7562, "num_token_overlap": 14.6201, "num_token_query": 37.3392, "num_token_union": 65.3262, "num_word_context": 202.2426, "num_word_doc": 49.8349, "num_word_query": 27.9649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1144.5157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2202, "query_norm": 1.3538, "queue_k_norm": 1.4015, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3392, "sent_len_1": 66.7562, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.5987, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.7462, "doc_norm": 1.3982, "encoder_q-embeddings": 927.2849, "encoder_q-layer.0": 733.2581, "encoder_q-layer.1": 805.3002, "encoder_q-layer.10": 335.8928, "encoder_q-layer.11": 915.9498, "encoder_q-layer.2": 885.3932, "encoder_q-layer.3": 945.6163, "encoder_q-layer.4": 1044.3455, "encoder_q-layer.5": 845.2399, "encoder_q-layer.6": 910.3299, "encoder_q-layer.7": 790.8897, "encoder_q-layer.8": 536.339, "encoder_q-layer.9": 354.0056, "epoch": 0.28, "inbatch_neg_score": 0.2186, "inbatch_pos_score": 0.8438, "learning_rate": 3.977777777777778e-05, "loss": 3.7462, "norm_diff": 0.026, "norm_loss": 0.0, "num_token_doc": 66.8667, "num_token_overlap": 14.5501, "num_token_query": 37.282, "num_token_union": 65.4358, "num_word_context": 202.2667, "num_word_doc": 49.9084, "num_word_query": 27.9103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1197.1013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2177, "query_norm": 1.3722, "queue_k_norm": 1.3998, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.282, "sent_len_1": 66.8667, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.0012, "stdk": 0.0474, "stdq": 0.0444, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7674, "doc_norm": 1.4038, "encoder_q-embeddings": 702.0554, "encoder_q-layer.0": 518.6885, "encoder_q-layer.1": 655.4797, "encoder_q-layer.10": 322.7258, "encoder_q-layer.11": 906.0331, "encoder_q-layer.2": 776.3779, "encoder_q-layer.3": 927.2576, "encoder_q-layer.4": 954.7088, "encoder_q-layer.5": 798.5094, "encoder_q-layer.6": 716.1671, "encoder_q-layer.7": 526.6554, "encoder_q-layer.8": 411.7628, "encoder_q-layer.9": 321.3179, "epoch": 0.28, "inbatch_neg_score": 0.2137, "inbatch_pos_score": 0.8164, "learning_rate": 3.972222222222222e-05, "loss": 3.7674, "norm_diff": 0.051, "norm_loss": 0.0, "num_token_doc": 66.9102, "num_token_overlap": 14.6094, "num_token_query": 37.3208, "num_token_union": 65.3845, "num_word_context": 202.3892, "num_word_doc": 49.8806, "num_word_query": 27.9594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1054.077, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2126, "query_norm": 1.3528, "queue_k_norm": 1.3998, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3208, "sent_len_1": 66.9102, "sent_len_max_0": 127.9887, "sent_len_max_1": 193.1825, "stdk": 0.0476, "stdq": 0.0438, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7373, "doc_norm": 1.3928, "encoder_q-embeddings": 1875.1453, "encoder_q-layer.0": 1391.3239, "encoder_q-layer.1": 1501.8092, "encoder_q-layer.10": 406.6234, "encoder_q-layer.11": 1001.6012, "encoder_q-layer.2": 1500.2802, "encoder_q-layer.3": 1519.0411, "encoder_q-layer.4": 1545.1016, "encoder_q-layer.5": 1037.769, "encoder_q-layer.6": 615.616, "encoder_q-layer.7": 511.5301, "encoder_q-layer.8": 447.6909, "encoder_q-layer.9": 362.6552, "epoch": 0.28, "inbatch_neg_score": 0.212, "inbatch_pos_score": 0.8091, "learning_rate": 3.966666666666667e-05, "loss": 3.7373, "norm_diff": 0.0197, "norm_loss": 0.0, "num_token_doc": 66.7935, "num_token_overlap": 14.6413, "num_token_query": 37.3714, "num_token_union": 65.3709, "num_word_context": 202.2218, "num_word_doc": 49.8479, "num_word_query": 27.9735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1820.3489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2124, "query_norm": 1.3745, "queue_k_norm": 1.3978, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3714, "sent_len_1": 66.7935, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2038, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7765, "doc_norm": 1.3948, "encoder_q-embeddings": 1051.7068, "encoder_q-layer.0": 751.9481, "encoder_q-layer.1": 849.5494, "encoder_q-layer.10": 326.2552, "encoder_q-layer.11": 978.094, "encoder_q-layer.2": 963.4182, "encoder_q-layer.3": 779.8195, "encoder_q-layer.4": 649.8201, "encoder_q-layer.5": 493.057, "encoder_q-layer.6": 352.9358, "encoder_q-layer.7": 324.788, "encoder_q-layer.8": 331.2432, "encoder_q-layer.9": 296.1454, "epoch": 0.28, "inbatch_neg_score": 0.2043, "inbatch_pos_score": 0.7944, "learning_rate": 3.961111111111111e-05, "loss": 3.7765, "norm_diff": 0.0391, "norm_loss": 0.0, "num_token_doc": 66.5685, "num_token_overlap": 14.4936, "num_token_query": 37.136, "num_token_union": 65.2101, "num_word_context": 202.148, "num_word_doc": 49.6434, "num_word_query": 27.7843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1087.8043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2045, "query_norm": 1.3558, "queue_k_norm": 1.396, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.136, "sent_len_1": 66.5685, "sent_len_max_0": 128.0, "sent_len_max_1": 188.59, "stdk": 0.0473, "stdq": 0.044, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7611, "doc_norm": 1.3873, "encoder_q-embeddings": 1147.8458, "encoder_q-layer.0": 854.7409, "encoder_q-layer.1": 926.334, "encoder_q-layer.10": 363.3953, "encoder_q-layer.11": 932.2252, "encoder_q-layer.2": 1018.4482, "encoder_q-layer.3": 1054.1326, "encoder_q-layer.4": 890.3224, "encoder_q-layer.5": 725.0118, "encoder_q-layer.6": 597.6711, "encoder_q-layer.7": 509.5409, "encoder_q-layer.8": 432.9655, "encoder_q-layer.9": 312.3435, "epoch": 0.28, "inbatch_neg_score": 0.2177, "inbatch_pos_score": 0.792, "learning_rate": 3.9555555555555556e-05, "loss": 3.7611, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.7578, "num_token_overlap": 14.5238, "num_token_query": 37.1219, "num_token_union": 65.2777, "num_word_context": 202.3682, "num_word_doc": 49.848, "num_word_query": 27.7866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1239.1021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2167, "query_norm": 1.3584, "queue_k_norm": 1.3982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1219, "sent_len_1": 66.7578, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8388, "stdk": 0.047, "stdq": 0.0438, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7371, "doc_norm": 1.399, "encoder_q-embeddings": 347.1877, "encoder_q-layer.0": 238.3668, "encoder_q-layer.1": 262.2391, "encoder_q-layer.10": 324.2533, "encoder_q-layer.11": 916.1187, "encoder_q-layer.2": 277.5135, "encoder_q-layer.3": 279.2618, "encoder_q-layer.4": 287.5574, "encoder_q-layer.5": 277.8578, "encoder_q-layer.6": 315.4364, "encoder_q-layer.7": 280.7004, "encoder_q-layer.8": 308.7258, "encoder_q-layer.9": 278.9507, "epoch": 0.28, "inbatch_neg_score": 0.228, "inbatch_pos_score": 0.8213, "learning_rate": 3.9500000000000005e-05, "loss": 3.7371, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.7227, "num_token_overlap": 14.504, "num_token_query": 37.0792, "num_token_union": 65.2026, "num_word_context": 201.8629, "num_word_doc": 49.7578, "num_word_query": 27.7267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 608.4272, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2269, "query_norm": 1.353, "queue_k_norm": 1.3964, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0792, "sent_len_1": 66.7227, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.06, "stdk": 0.0475, "stdq": 0.0432, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7296, "doc_norm": 1.399, "encoder_q-embeddings": 6950.6504, "encoder_q-layer.0": 5808.3032, "encoder_q-layer.1": 6131.9771, "encoder_q-layer.10": 341.0746, "encoder_q-layer.11": 920.1716, "encoder_q-layer.2": 6310.8037, "encoder_q-layer.3": 5888.0679, "encoder_q-layer.4": 5784.5679, "encoder_q-layer.5": 5767.2393, "encoder_q-layer.6": 3772.2676, "encoder_q-layer.7": 3018.0171, "encoder_q-layer.8": 1748.9196, "encoder_q-layer.9": 479.8923, "epoch": 0.28, "inbatch_neg_score": 0.2286, "inbatch_pos_score": 0.8257, "learning_rate": 3.944444444444445e-05, "loss": 3.7296, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.5997, "num_token_overlap": 14.5785, "num_token_query": 37.3012, "num_token_union": 65.2793, "num_word_context": 202.2443, "num_word_doc": 49.6932, "num_word_query": 27.9077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7367.7284, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2283, "query_norm": 1.3533, "queue_k_norm": 1.3971, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3012, "sent_len_1": 66.5997, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2675, "stdk": 0.0475, "stdq": 0.0436, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7069, "doc_norm": 1.3891, "encoder_q-embeddings": 918.4783, "encoder_q-layer.0": 672.528, "encoder_q-layer.1": 699.9163, "encoder_q-layer.10": 336.5942, "encoder_q-layer.11": 1004.7212, "encoder_q-layer.2": 778.2334, "encoder_q-layer.3": 763.4218, "encoder_q-layer.4": 800.5197, "encoder_q-layer.5": 694.9092, "encoder_q-layer.6": 618.7296, "encoder_q-layer.7": 596.961, "encoder_q-layer.8": 539.1018, "encoder_q-layer.9": 331.324, "epoch": 0.28, "inbatch_neg_score": 0.2308, "inbatch_pos_score": 0.8154, "learning_rate": 3.938888888888889e-05, "loss": 3.7069, "norm_diff": 0.0259, "norm_loss": 0.0, "num_token_doc": 66.7968, "num_token_overlap": 14.6747, "num_token_query": 37.4682, "num_token_union": 65.3833, "num_word_context": 202.3665, "num_word_doc": 49.837, "num_word_query": 28.0482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.0843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2305, "query_norm": 1.365, "queue_k_norm": 1.3969, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4682, "sent_len_1": 66.7968, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1438, "stdk": 0.0471, "stdq": 0.044, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.6981, "doc_norm": 1.3927, "encoder_q-embeddings": 1241.3708, "encoder_q-layer.0": 923.1594, "encoder_q-layer.1": 1080.3339, "encoder_q-layer.10": 372.1757, "encoder_q-layer.11": 964.8303, "encoder_q-layer.2": 1236.6289, "encoder_q-layer.3": 1002.1147, "encoder_q-layer.4": 848.9004, "encoder_q-layer.5": 687.0924, "encoder_q-layer.6": 599.1821, "encoder_q-layer.7": 477.2012, "encoder_q-layer.8": 407.6246, "encoder_q-layer.9": 342.8034, "epoch": 0.29, "inbatch_neg_score": 0.2481, "inbatch_pos_score": 0.8164, "learning_rate": 3.933333333333333e-05, "loss": 3.6981, "norm_diff": 0.0239, "norm_loss": 0.0, "num_token_doc": 66.7928, "num_token_overlap": 14.6182, "num_token_query": 37.2996, "num_token_union": 65.3513, "num_word_context": 202.2429, "num_word_doc": 49.8632, "num_word_query": 27.9363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1318.3395, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2466, "query_norm": 1.3761, "queue_k_norm": 1.3986, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2996, "sent_len_1": 66.7928, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7138, "stdk": 0.0472, "stdq": 0.0441, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.7298, "doc_norm": 1.3994, "encoder_q-embeddings": 612.4391, "encoder_q-layer.0": 431.7263, "encoder_q-layer.1": 498.2053, "encoder_q-layer.10": 312.5091, "encoder_q-layer.11": 902.2626, "encoder_q-layer.2": 526.0432, "encoder_q-layer.3": 440.2318, "encoder_q-layer.4": 408.9169, "encoder_q-layer.5": 375.5644, "encoder_q-layer.6": 339.9866, "encoder_q-layer.7": 338.9166, "encoder_q-layer.8": 332.3265, "encoder_q-layer.9": 302.8888, "epoch": 0.29, "inbatch_neg_score": 0.2483, "inbatch_pos_score": 0.8721, "learning_rate": 3.927777777777778e-05, "loss": 3.7298, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.6355, "num_token_overlap": 14.6455, "num_token_query": 37.4536, "num_token_union": 65.3377, "num_word_context": 202.0134, "num_word_doc": 49.6887, "num_word_query": 28.0474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 752.0305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.249, "query_norm": 1.3791, "queue_k_norm": 1.4001, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4536, "sent_len_1": 66.6355, "sent_len_max_0": 128.0, "sent_len_max_1": 189.39, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.7429, "doc_norm": 1.3973, "encoder_q-embeddings": 495.3182, "encoder_q-layer.0": 353.052, "encoder_q-layer.1": 379.5872, "encoder_q-layer.10": 312.2405, "encoder_q-layer.11": 924.5825, "encoder_q-layer.2": 461.9003, "encoder_q-layer.3": 492.8619, "encoder_q-layer.4": 500.2009, "encoder_q-layer.5": 428.091, "encoder_q-layer.6": 375.2621, "encoder_q-layer.7": 399.1958, "encoder_q-layer.8": 371.1823, "encoder_q-layer.9": 298.0144, "epoch": 0.29, "inbatch_neg_score": 0.2529, "inbatch_pos_score": 0.8442, "learning_rate": 3.922222222222223e-05, "loss": 3.7429, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.9057, "num_token_overlap": 14.5626, "num_token_query": 37.2012, "num_token_union": 65.357, "num_word_context": 202.302, "num_word_doc": 49.9166, "num_word_query": 27.8501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 742.1819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.3632, "queue_k_norm": 1.4014, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2012, "sent_len_1": 66.9057, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.345, "stdk": 0.0473, "stdq": 0.0434, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.7528, "doc_norm": 1.3965, "encoder_q-embeddings": 1881.4512, "encoder_q-layer.0": 1224.7375, "encoder_q-layer.1": 1398.3455, "encoder_q-layer.10": 340.5239, "encoder_q-layer.11": 916.5464, "encoder_q-layer.2": 1727.064, "encoder_q-layer.3": 1781.4009, "encoder_q-layer.4": 1983.4995, "encoder_q-layer.5": 2045.2622, "encoder_q-layer.6": 1320.8494, "encoder_q-layer.7": 803.8198, "encoder_q-layer.8": 490.6811, "encoder_q-layer.9": 310.9687, "epoch": 0.29, "inbatch_neg_score": 0.2522, "inbatch_pos_score": 0.8398, "learning_rate": 3.9166666666666665e-05, "loss": 3.7528, "norm_diff": 0.0317, "norm_loss": 0.0, "num_token_doc": 66.8132, "num_token_overlap": 14.5305, "num_token_query": 37.2006, "num_token_union": 65.3354, "num_word_context": 202.305, "num_word_doc": 49.857, "num_word_query": 27.8289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2105.0811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.3648, "queue_k_norm": 1.4018, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2006, "sent_len_1": 66.8132, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9787, "stdk": 0.0473, "stdq": 0.0433, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.7277, "doc_norm": 1.3966, "encoder_q-embeddings": 1470.838, "encoder_q-layer.0": 1040.418, "encoder_q-layer.1": 1162.194, "encoder_q-layer.10": 313.1902, "encoder_q-layer.11": 848.6396, "encoder_q-layer.2": 1239.3348, "encoder_q-layer.3": 1433.9017, "encoder_q-layer.4": 1564.1981, "encoder_q-layer.5": 1490.8872, "encoder_q-layer.6": 1367.4697, "encoder_q-layer.7": 1241.7686, "encoder_q-layer.8": 903.4023, "encoder_q-layer.9": 346.954, "epoch": 0.29, "inbatch_neg_score": 0.2576, "inbatch_pos_score": 0.8643, "learning_rate": 3.9111111111111115e-05, "loss": 3.7277, "norm_diff": 0.0203, "norm_loss": 0.0, "num_token_doc": 66.775, "num_token_overlap": 14.5457, "num_token_query": 37.156, "num_token_union": 65.3385, "num_word_context": 202.1657, "num_word_doc": 49.8664, "num_word_query": 27.8031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1760.9813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2588, "query_norm": 1.3918, "queue_k_norm": 1.4035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.156, "sent_len_1": 66.775, "sent_len_max_0": 127.9925, "sent_len_max_1": 187.5525, "stdk": 0.0473, "stdq": 0.0444, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7315, "doc_norm": 1.4082, "encoder_q-embeddings": 5547.7334, "encoder_q-layer.0": 3982.2747, "encoder_q-layer.1": 4442.4106, "encoder_q-layer.10": 693.8922, "encoder_q-layer.11": 1858.0032, "encoder_q-layer.2": 4912.4663, "encoder_q-layer.3": 5378.5059, "encoder_q-layer.4": 6574.8921, "encoder_q-layer.5": 3801.1682, "encoder_q-layer.6": 1745.5105, "encoder_q-layer.7": 1433.6488, "encoder_q-layer.8": 1088.718, "encoder_q-layer.9": 730.2958, "epoch": 0.29, "inbatch_neg_score": 0.2564, "inbatch_pos_score": 0.8369, "learning_rate": 3.905555555555556e-05, "loss": 3.7315, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.8093, "num_token_overlap": 14.5915, "num_token_query": 37.439, "num_token_union": 65.4815, "num_word_context": 202.4697, "num_word_doc": 49.844, "num_word_query": 28.0278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5894.8379, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2546, "query_norm": 1.3913, "queue_k_norm": 1.4054, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.439, "sent_len_1": 66.8093, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.25, "stdk": 0.0476, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7293, "doc_norm": 1.399, "encoder_q-embeddings": 2435.1287, "encoder_q-layer.0": 1683.533, "encoder_q-layer.1": 1943.4103, "encoder_q-layer.10": 683.6046, "encoder_q-layer.11": 1868.2404, "encoder_q-layer.2": 1877.1974, "encoder_q-layer.3": 1990.8479, "encoder_q-layer.4": 2016.1162, "encoder_q-layer.5": 1625.043, "encoder_q-layer.6": 1320.2107, "encoder_q-layer.7": 1053.3256, "encoder_q-layer.8": 889.1006, "encoder_q-layer.9": 643.7629, "epoch": 0.29, "inbatch_neg_score": 0.2567, "inbatch_pos_score": 0.8516, "learning_rate": 3.9000000000000006e-05, "loss": 3.7293, "norm_diff": 0.0203, "norm_loss": 0.0, "num_token_doc": 66.6789, "num_token_overlap": 14.6508, "num_token_query": 37.5182, "num_token_union": 65.3754, "num_word_context": 202.2232, "num_word_doc": 49.7364, "num_word_query": 28.0827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2519.3831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2554, "query_norm": 1.3945, "queue_k_norm": 1.4048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5182, "sent_len_1": 66.6789, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3537, "stdk": 0.0473, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7095, "doc_norm": 1.4038, "encoder_q-embeddings": 1762.0017, "encoder_q-layer.0": 1238.6462, "encoder_q-layer.1": 1342.2914, "encoder_q-layer.10": 615.1637, "encoder_q-layer.11": 1776.1655, "encoder_q-layer.2": 1535.3246, "encoder_q-layer.3": 1485.6418, "encoder_q-layer.4": 1510.5131, "encoder_q-layer.5": 1356.5284, "encoder_q-layer.6": 1150.9092, "encoder_q-layer.7": 906.3605, "encoder_q-layer.8": 733.4558, "encoder_q-layer.9": 604.3278, "epoch": 0.29, "inbatch_neg_score": 0.2545, "inbatch_pos_score": 0.8374, "learning_rate": 3.894444444444444e-05, "loss": 3.7095, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.8479, "num_token_overlap": 14.5754, "num_token_query": 37.2963, "num_token_union": 65.4495, "num_word_context": 202.5839, "num_word_doc": 49.8796, "num_word_query": 27.9023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1994.0843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.3789, "queue_k_norm": 1.4073, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2963, "sent_len_1": 66.8479, "sent_len_max_0": 127.9788, "sent_len_max_1": 190.0375, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7455, "doc_norm": 1.3994, "encoder_q-embeddings": 1054.6774, "encoder_q-layer.0": 725.9211, "encoder_q-layer.1": 800.2158, "encoder_q-layer.10": 612.3238, "encoder_q-layer.11": 1767.1329, "encoder_q-layer.2": 917.8036, "encoder_q-layer.3": 907.7063, "encoder_q-layer.4": 954.791, "encoder_q-layer.5": 903.9363, "encoder_q-layer.6": 866.2923, "encoder_q-layer.7": 696.2788, "encoder_q-layer.8": 766.0924, "encoder_q-layer.9": 610.7591, "epoch": 0.29, "inbatch_neg_score": 0.2577, "inbatch_pos_score": 0.8257, "learning_rate": 3.888888888888889e-05, "loss": 3.7455, "norm_diff": 0.0291, "norm_loss": 0.0, "num_token_doc": 66.6366, "num_token_overlap": 14.576, "num_token_query": 37.305, "num_token_union": 65.2916, "num_word_context": 202.0063, "num_word_doc": 49.7382, "num_word_query": 27.9135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1448.0091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2568, "query_norm": 1.3721, "queue_k_norm": 1.4084, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.305, "sent_len_1": 66.6366, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.0737, "stdk": 0.0473, "stdq": 0.0435, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 27.7519, "dev_samples_per_second": 2.306, "dev_steps_per_second": 0.036, "epoch": 0.29, "step": 30000, "test_accuracy": 92.37060546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.45720285177230835, "test_doc_norm": 1.3762460947036743, "test_inbatch_neg_score": 0.5709078907966614, "test_inbatch_pos_score": 1.450791597366333, "test_loss": 0.45720285177230835, "test_loss_align": 1.169838786125183, "test_loss_unif": 3.8636064529418945, "test_loss_unif_q@queue": 3.8636066913604736, "test_norm_diff": 0.08786547183990479, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.24534200131893158, "test_query_norm": 1.464111566543579, "test_queue_k_norm": 1.4077144861221313, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04143323004245758, "test_stdq": 0.04197421669960022, "test_stdqueue_k": 0.047672074288129807, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.7519, "dev_samples_per_second": 2.306, "dev_steps_per_second": 0.036, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.34455, "eval_beir-arguana_recall@10": 0.57824, "eval_beir-arguana_recall@100": 0.87838, "eval_beir-arguana_recall@20": 0.69559, "eval_beir-avg_ndcg@10": 0.33085491666666667, "eval_beir-avg_recall@10": 0.3959799166666667, "eval_beir-avg_recall@100": 0.5728220833333333, "eval_beir-avg_recall@20": 0.45260749999999994, "eval_beir-cqadupstack_ndcg@10": 0.2327691666666667, "eval_beir-cqadupstack_recall@10": 0.31599916666666666, "eval_beir-cqadupstack_recall@100": 0.5375108333333333, "eval_beir-cqadupstack_recall@20": 0.38012500000000005, "eval_beir-fiqa_ndcg@10": 0.18625, "eval_beir-fiqa_recall@10": 0.23136, "eval_beir-fiqa_recall@100": 0.46472, "eval_beir-fiqa_recall@20": 0.29647, "eval_beir-nfcorpus_ndcg@10": 0.27006, "eval_beir-nfcorpus_recall@10": 0.1291, "eval_beir-nfcorpus_recall@100": 0.25179, "eval_beir-nfcorpus_recall@20": 0.15662, "eval_beir-nq_ndcg@10": 0.21475, "eval_beir-nq_recall@10": 0.36119, "eval_beir-nq_recall@100": 0.68888, "eval_beir-nq_recall@20": 0.47506, "eval_beir-quora_ndcg@10": 0.69975, "eval_beir-quora_recall@10": 0.82506, "eval_beir-quora_recall@100": 0.95808, "eval_beir-quora_recall@20": 0.88042, "eval_beir-scidocs_ndcg@10": 0.12831, "eval_beir-scidocs_recall@10": 0.13662, "eval_beir-scidocs_recall@100": 0.31878, "eval_beir-scidocs_recall@20": 0.18712, "eval_beir-scifact_ndcg@10": 0.57955, "eval_beir-scifact_recall@10": 0.73733, "eval_beir-scifact_recall@100": 0.88656, "eval_beir-scifact_recall@20": 0.78328, "eval_beir-trec-covid_ndcg@10": 0.49924, "eval_beir-trec-covid_recall@10": 0.534, "eval_beir-trec-covid_recall@100": 0.3682, "eval_beir-trec-covid_recall@20": 0.49, "eval_beir-webis-touche2020_ndcg@10": 0.15332, "eval_beir-webis-touche2020_recall@10": 0.1109, "eval_beir-webis-touche2020_recall@100": 0.37532, "eval_beir-webis-touche2020_recall@20": 0.18139, "eval_senteval-avg_sts": 0.7421762619522139, "eval_senteval-sickr_spearman": 0.716071291541291, "eval_senteval-stsb_spearman": 0.7682812323631368, "step": 30000, "test_accuracy": 92.37060546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.45720285177230835, "test_doc_norm": 1.3762460947036743, "test_inbatch_neg_score": 0.5709078907966614, "test_inbatch_pos_score": 1.450791597366333, "test_loss": 0.45720285177230835, "test_loss_align": 1.169838786125183, "test_loss_unif": 3.8636064529418945, "test_loss_unif_q@queue": 3.8636066913604736, "test_norm_diff": 0.08786547183990479, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.24534200131893158, "test_query_norm": 1.464111566543579, "test_queue_k_norm": 1.4077144861221313, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04143323004245758, "test_stdq": 0.04197421669960022, "test_stdqueue_k": 0.047672074288129807, "test_stdqueue_q": 0.0 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.7325, "doc_norm": 1.4061, "encoder_q-embeddings": 12012.1982, "encoder_q-layer.0": 8507.1836, "encoder_q-layer.1": 10335.3994, "encoder_q-layer.10": 649.2462, "encoder_q-layer.11": 1736.5101, "encoder_q-layer.2": 10288.5791, "encoder_q-layer.3": 9399.3584, "encoder_q-layer.4": 9113.9082, "encoder_q-layer.5": 6425.5996, "encoder_q-layer.6": 6711.167, "encoder_q-layer.7": 5750.5571, "encoder_q-layer.8": 3577.3503, "encoder_q-layer.9": 1187.4668, "epoch": 0.29, "inbatch_neg_score": 0.2576, "inbatch_pos_score": 0.894, "learning_rate": 3.883333333333333e-05, "loss": 3.7325, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.9721, "num_token_overlap": 14.5618, "num_token_query": 37.2989, "num_token_union": 65.51, "num_word_context": 202.6147, "num_word_doc": 49.9669, "num_word_query": 27.916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11745.5077, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2578, "query_norm": 1.3944, "queue_k_norm": 1.4088, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2989, "sent_len_1": 66.9721, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3613, "stdk": 0.0475, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.7259, "doc_norm": 1.4104, "encoder_q-embeddings": 3075.3862, "encoder_q-layer.0": 2246.5579, "encoder_q-layer.1": 2744.5083, "encoder_q-layer.10": 692.1395, "encoder_q-layer.11": 1795.4606, "encoder_q-layer.2": 3073.469, "encoder_q-layer.3": 3120.2756, "encoder_q-layer.4": 3025.1169, "encoder_q-layer.5": 2779.0122, "encoder_q-layer.6": 2117.084, "encoder_q-layer.7": 1258.0916, "encoder_q-layer.8": 819.2655, "encoder_q-layer.9": 637.6677, "epoch": 0.29, "inbatch_neg_score": 0.2603, "inbatch_pos_score": 0.8633, "learning_rate": 3.877777777777778e-05, "loss": 3.7259, "norm_diff": 0.0519, "norm_loss": 0.0, "num_token_doc": 66.9788, "num_token_overlap": 14.6439, "num_token_query": 37.5225, "num_token_union": 65.5961, "num_word_context": 202.5798, "num_word_doc": 49.9561, "num_word_query": 28.0897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3543.8353, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.3585, "queue_k_norm": 1.4089, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5225, "sent_len_1": 66.9788, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7463, "stdk": 0.0476, "stdq": 0.0433, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7111, "doc_norm": 1.4072, "encoder_q-embeddings": 1986.9327, "encoder_q-layer.0": 1439.6813, "encoder_q-layer.1": 1530.5879, "encoder_q-layer.10": 609.5375, "encoder_q-layer.11": 1718.8684, "encoder_q-layer.2": 1818.8977, "encoder_q-layer.3": 1751.2324, "encoder_q-layer.4": 1619.9899, "encoder_q-layer.5": 1250.0322, "encoder_q-layer.6": 1054.8378, "encoder_q-layer.7": 798.4268, "encoder_q-layer.8": 718.1832, "encoder_q-layer.9": 604.8148, "epoch": 0.3, "inbatch_neg_score": 0.2595, "inbatch_pos_score": 0.8501, "learning_rate": 3.8722222222222225e-05, "loss": 3.7111, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.7785, "num_token_overlap": 14.5978, "num_token_query": 37.3508, "num_token_union": 65.3633, "num_word_context": 202.3064, "num_word_doc": 49.8553, "num_word_query": 27.9749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2150.1843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.259, "query_norm": 1.3688, "queue_k_norm": 1.4107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3508, "sent_len_1": 66.7785, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5163, "stdk": 0.0475, "stdq": 0.0438, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7357, "doc_norm": 1.4183, "encoder_q-embeddings": 800.1881, "encoder_q-layer.0": 568.601, "encoder_q-layer.1": 593.0872, "encoder_q-layer.10": 636.9007, "encoder_q-layer.11": 1702.4344, "encoder_q-layer.2": 628.8918, "encoder_q-layer.3": 661.9668, "encoder_q-layer.4": 705.7876, "encoder_q-layer.5": 611.6169, "encoder_q-layer.6": 663.46, "encoder_q-layer.7": 636.3138, "encoder_q-layer.8": 660.8324, "encoder_q-layer.9": 564.0228, "epoch": 0.3, "inbatch_neg_score": 0.2524, "inbatch_pos_score": 0.8589, "learning_rate": 3.866666666666667e-05, "loss": 3.7357, "norm_diff": 0.0421, "norm_loss": 0.0, "num_token_doc": 66.7546, "num_token_overlap": 14.6065, "num_token_query": 37.2608, "num_token_union": 65.2881, "num_word_context": 202.0061, "num_word_doc": 49.7392, "num_word_query": 27.8715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.0616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.251, "query_norm": 1.3761, "queue_k_norm": 1.4105, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2608, "sent_len_1": 66.7546, "sent_len_max_0": 127.9737, "sent_len_max_1": 191.6925, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.7055, "doc_norm": 1.4072, "encoder_q-embeddings": 1330.9532, "encoder_q-layer.0": 965.2649, "encoder_q-layer.1": 1099.6769, "encoder_q-layer.10": 617.8799, "encoder_q-layer.11": 1837.1362, "encoder_q-layer.2": 1017.2566, "encoder_q-layer.3": 997.9353, "encoder_q-layer.4": 1154.9877, "encoder_q-layer.5": 1195.2966, "encoder_q-layer.6": 1064.5356, "encoder_q-layer.7": 699.0326, "encoder_q-layer.8": 717.2238, "encoder_q-layer.9": 589.8809, "epoch": 0.3, "inbatch_neg_score": 0.2466, "inbatch_pos_score": 0.856, "learning_rate": 3.8611111111111116e-05, "loss": 3.7055, "norm_diff": 0.0323, "norm_loss": 0.0, "num_token_doc": 66.7407, "num_token_overlap": 14.6589, "num_token_query": 37.3582, "num_token_union": 65.2973, "num_word_context": 202.1317, "num_word_doc": 49.7998, "num_word_query": 27.9867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1654.6257, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.245, "query_norm": 1.3749, "queue_k_norm": 1.4138, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3582, "sent_len_1": 66.7407, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.6387, "stdk": 0.0475, "stdq": 0.0443, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7084, "doc_norm": 1.4118, "encoder_q-embeddings": 1492.7881, "encoder_q-layer.0": 1029.0188, "encoder_q-layer.1": 1112.1323, "encoder_q-layer.10": 663.0717, "encoder_q-layer.11": 1861.5741, "encoder_q-layer.2": 1309.2859, "encoder_q-layer.3": 1304.9869, "encoder_q-layer.4": 1453.5098, "encoder_q-layer.5": 1304.8005, "encoder_q-layer.6": 1129.1129, "encoder_q-layer.7": 864.4272, "encoder_q-layer.8": 813.9269, "encoder_q-layer.9": 644.1134, "epoch": 0.3, "inbatch_neg_score": 0.2504, "inbatch_pos_score": 0.8491, "learning_rate": 3.855555555555556e-05, "loss": 3.7084, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.7479, "num_token_overlap": 14.5993, "num_token_query": 37.4752, "num_token_union": 65.4654, "num_word_context": 202.3675, "num_word_doc": 49.8466, "num_word_query": 28.0679, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.4143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2493, "query_norm": 1.3876, "queue_k_norm": 1.4121, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4752, "sent_len_1": 66.7479, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4338, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.7181, "doc_norm": 1.4078, "encoder_q-embeddings": 818.9613, "encoder_q-layer.0": 588.7089, "encoder_q-layer.1": 623.8873, "encoder_q-layer.10": 640.0197, "encoder_q-layer.11": 1916.3989, "encoder_q-layer.2": 637.5864, "encoder_q-layer.3": 605.2237, "encoder_q-layer.4": 586.4615, "encoder_q-layer.5": 567.1275, "encoder_q-layer.6": 529.865, "encoder_q-layer.7": 538.5724, "encoder_q-layer.8": 649.006, "encoder_q-layer.9": 593.2833, "epoch": 0.3, "inbatch_neg_score": 0.2459, "inbatch_pos_score": 0.8384, "learning_rate": 3.85e-05, "loss": 3.7181, "norm_diff": 0.0398, "norm_loss": 0.0, "num_token_doc": 66.7699, "num_token_overlap": 14.5487, "num_token_query": 37.2377, "num_token_union": 65.3575, "num_word_context": 202.25, "num_word_doc": 49.8049, "num_word_query": 27.8975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1298.0681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2458, "query_norm": 1.368, "queue_k_norm": 1.4125, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2377, "sent_len_1": 66.7699, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.0213, "stdk": 0.0475, "stdq": 0.0438, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7127, "doc_norm": 1.4113, "encoder_q-embeddings": 1038.5443, "encoder_q-layer.0": 717.6807, "encoder_q-layer.1": 749.4849, "encoder_q-layer.10": 648.4642, "encoder_q-layer.11": 1813.7772, "encoder_q-layer.2": 887.8425, "encoder_q-layer.3": 943.6923, "encoder_q-layer.4": 982.5782, "encoder_q-layer.5": 851.2089, "encoder_q-layer.6": 941.3079, "encoder_q-layer.7": 865.4316, "encoder_q-layer.8": 799.7918, "encoder_q-layer.9": 619.4325, "epoch": 0.3, "inbatch_neg_score": 0.2332, "inbatch_pos_score": 0.812, "learning_rate": 3.844444444444444e-05, "loss": 3.7127, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.6658, "num_token_overlap": 14.5794, "num_token_query": 37.2013, "num_token_union": 65.2508, "num_word_context": 202.3454, "num_word_doc": 49.7524, "num_word_query": 27.8352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1496.6902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2327, "query_norm": 1.3491, "queue_k_norm": 1.4123, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2013, "sent_len_1": 66.6658, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7038, "stdk": 0.0476, "stdq": 0.0434, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7042, "doc_norm": 1.4153, "encoder_q-embeddings": 1186.0953, "encoder_q-layer.0": 785.7659, "encoder_q-layer.1": 855.4669, "encoder_q-layer.10": 604.6077, "encoder_q-layer.11": 1796.0592, "encoder_q-layer.2": 909.5228, "encoder_q-layer.3": 905.4808, "encoder_q-layer.4": 806.8779, "encoder_q-layer.5": 763.6584, "encoder_q-layer.6": 667.2294, "encoder_q-layer.7": 667.1421, "encoder_q-layer.8": 683.6514, "encoder_q-layer.9": 596.6324, "epoch": 0.3, "inbatch_neg_score": 0.24, "inbatch_pos_score": 0.853, "learning_rate": 3.838888888888889e-05, "loss": 3.7042, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.6393, "num_token_overlap": 14.5368, "num_token_query": 37.2059, "num_token_union": 65.2381, "num_word_context": 201.8313, "num_word_doc": 49.7045, "num_word_query": 27.8456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1448.3355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2397, "query_norm": 1.3809, "queue_k_norm": 1.4101, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2059, "sent_len_1": 66.6393, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9363, "stdk": 0.0478, "stdq": 0.0444, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.7157, "doc_norm": 1.4098, "encoder_q-embeddings": 3219.2234, "encoder_q-layer.0": 2428.7441, "encoder_q-layer.1": 2471.9392, "encoder_q-layer.10": 742.3472, "encoder_q-layer.11": 1887.85, "encoder_q-layer.2": 2621.0806, "encoder_q-layer.3": 2447.6921, "encoder_q-layer.4": 2570.7651, "encoder_q-layer.5": 2111.7454, "encoder_q-layer.6": 1537.8931, "encoder_q-layer.7": 1045.4969, "encoder_q-layer.8": 859.3435, "encoder_q-layer.9": 681.7684, "epoch": 0.3, "inbatch_neg_score": 0.2318, "inbatch_pos_score": 0.8652, "learning_rate": 3.8333333333333334e-05, "loss": 3.7157, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.7309, "num_token_overlap": 14.6417, "num_token_query": 37.5023, "num_token_union": 65.4365, "num_word_context": 202.3547, "num_word_doc": 49.762, "num_word_query": 28.0751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3163.9498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2311, "query_norm": 1.3727, "queue_k_norm": 1.4111, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5023, "sent_len_1": 66.7309, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5075, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.7277, "doc_norm": 1.4183, "encoder_q-embeddings": 2587.1746, "encoder_q-layer.0": 1796.3582, "encoder_q-layer.1": 1934.2539, "encoder_q-layer.10": 623.4996, "encoder_q-layer.11": 1876.9789, "encoder_q-layer.2": 2189.9685, "encoder_q-layer.3": 2461.2307, "encoder_q-layer.4": 2414.4109, "encoder_q-layer.5": 2834.0437, "encoder_q-layer.6": 3158.2246, "encoder_q-layer.7": 3113.074, "encoder_q-layer.8": 1451.6056, "encoder_q-layer.9": 705.2276, "epoch": 0.3, "inbatch_neg_score": 0.2283, "inbatch_pos_score": 0.8301, "learning_rate": 3.827777777777778e-05, "loss": 3.7277, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.8296, "num_token_overlap": 14.569, "num_token_query": 37.4208, "num_token_union": 65.4839, "num_word_context": 202.4983, "num_word_doc": 49.8656, "num_word_query": 28.0299, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3346.3136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2274, "query_norm": 1.361, "queue_k_norm": 1.4102, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4208, "sent_len_1": 66.8296, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5775, "stdk": 0.048, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.709, "doc_norm": 1.4072, "encoder_q-embeddings": 1738.0215, "encoder_q-layer.0": 1295.1376, "encoder_q-layer.1": 1420.311, "encoder_q-layer.10": 625.3707, "encoder_q-layer.11": 1774.8171, "encoder_q-layer.2": 1566.3606, "encoder_q-layer.3": 1656.8179, "encoder_q-layer.4": 1885.6333, "encoder_q-layer.5": 1475.8849, "encoder_q-layer.6": 901.7868, "encoder_q-layer.7": 871.3349, "encoder_q-layer.8": 726.3155, "encoder_q-layer.9": 580.7562, "epoch": 0.3, "inbatch_neg_score": 0.2254, "inbatch_pos_score": 0.8315, "learning_rate": 3.8222222222222226e-05, "loss": 3.709, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.8201, "num_token_overlap": 14.5964, "num_token_query": 37.3334, "num_token_union": 65.3654, "num_word_context": 202.013, "num_word_doc": 49.8099, "num_word_query": 27.9418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.4257, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2258, "query_norm": 1.3716, "queue_k_norm": 1.4112, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3334, "sent_len_1": 66.8201, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.6488, "stdk": 0.0475, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.7033, "doc_norm": 1.4149, "encoder_q-embeddings": 813.8918, "encoder_q-layer.0": 554.4271, "encoder_q-layer.1": 586.5557, "encoder_q-layer.10": 599.4298, "encoder_q-layer.11": 1740.9402, "encoder_q-layer.2": 670.8234, "encoder_q-layer.3": 743.8715, "encoder_q-layer.4": 761.9111, "encoder_q-layer.5": 696.8407, "encoder_q-layer.6": 757.7802, "encoder_q-layer.7": 728.1012, "encoder_q-layer.8": 773.2764, "encoder_q-layer.9": 591.0083, "epoch": 0.31, "inbatch_neg_score": 0.2193, "inbatch_pos_score": 0.8247, "learning_rate": 3.816666666666667e-05, "loss": 3.7033, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 14.5448, "num_token_query": 37.3094, "num_token_union": 65.436, "num_word_context": 202.1094, "num_word_doc": 49.8247, "num_word_query": 27.9145, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1288.6063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2205, "query_norm": 1.3655, "queue_k_norm": 1.4111, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3094, "sent_len_1": 66.7773, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.525, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.7243, "doc_norm": 1.4109, "encoder_q-embeddings": 5596.6548, "encoder_q-layer.0": 4480.8911, "encoder_q-layer.1": 4420.8657, "encoder_q-layer.10": 649.391, "encoder_q-layer.11": 1809.4816, "encoder_q-layer.2": 3768.3335, "encoder_q-layer.3": 3741.1841, "encoder_q-layer.4": 3876.8757, "encoder_q-layer.5": 4329.2524, "encoder_q-layer.6": 4525.022, "encoder_q-layer.7": 3494.7373, "encoder_q-layer.8": 1388.2457, "encoder_q-layer.9": 713.8986, "epoch": 0.31, "inbatch_neg_score": 0.2235, "inbatch_pos_score": 0.8423, "learning_rate": 3.811111111111112e-05, "loss": 3.7243, "norm_diff": 0.0296, "norm_loss": 0.0, "num_token_doc": 66.8222, "num_token_overlap": 14.584, "num_token_query": 37.275, "num_token_union": 65.4393, "num_word_context": 202.5979, "num_word_doc": 49.8895, "num_word_query": 27.9035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5590.6724, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2241, "query_norm": 1.3869, "queue_k_norm": 1.4094, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.275, "sent_len_1": 66.8222, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.1687, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.7057, "doc_norm": 1.4158, "encoder_q-embeddings": 3830.3027, "encoder_q-layer.0": 2796.4924, "encoder_q-layer.1": 3049.6597, "encoder_q-layer.10": 645.1375, "encoder_q-layer.11": 1939.9258, "encoder_q-layer.2": 2888.5437, "encoder_q-layer.3": 2958.2393, "encoder_q-layer.4": 3031.9263, "encoder_q-layer.5": 2806.4556, "encoder_q-layer.6": 2637.4863, "encoder_q-layer.7": 1476.59, "encoder_q-layer.8": 1102.2062, "encoder_q-layer.9": 650.1351, "epoch": 0.31, "inbatch_neg_score": 0.226, "inbatch_pos_score": 0.8301, "learning_rate": 3.805555555555555e-05, "loss": 3.7057, "norm_diff": 0.078, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 14.5905, "num_token_query": 37.3156, "num_token_union": 65.4299, "num_word_context": 202.0176, "num_word_doc": 49.8731, "num_word_query": 27.9222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3854.6272, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2253, "query_norm": 1.3378, "queue_k_norm": 1.4096, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3156, "sent_len_1": 66.8871, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2975, "stdk": 0.048, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7232, "doc_norm": 1.4041, "encoder_q-embeddings": 861.8181, "encoder_q-layer.0": 581.3254, "encoder_q-layer.1": 598.8364, "encoder_q-layer.10": 639.9235, "encoder_q-layer.11": 1839.7168, "encoder_q-layer.2": 654.5598, "encoder_q-layer.3": 715.9291, "encoder_q-layer.4": 716.8879, "encoder_q-layer.5": 634.674, "encoder_q-layer.6": 622.0326, "encoder_q-layer.7": 645.9263, "encoder_q-layer.8": 676.3287, "encoder_q-layer.9": 582.7369, "epoch": 0.31, "inbatch_neg_score": 0.2196, "inbatch_pos_score": 0.8096, "learning_rate": 3.8e-05, "loss": 3.7232, "norm_diff": 0.0691, "norm_loss": 0.0, "num_token_doc": 66.9568, "num_token_overlap": 14.5993, "num_token_query": 37.3101, "num_token_union": 65.4668, "num_word_context": 202.666, "num_word_doc": 49.9706, "num_word_query": 27.9331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1291.3175, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2189, "query_norm": 1.335, "queue_k_norm": 1.4081, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3101, "sent_len_1": 66.9568, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.1138, "stdk": 0.0475, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.7147, "doc_norm": 1.408, "encoder_q-embeddings": 4888.5151, "encoder_q-layer.0": 3465.3779, "encoder_q-layer.1": 3486.1387, "encoder_q-layer.10": 1414.0812, "encoder_q-layer.11": 3783.6135, "encoder_q-layer.2": 4207.1191, "encoder_q-layer.3": 4332.3652, "encoder_q-layer.4": 4149.9043, "encoder_q-layer.5": 4193.4321, "encoder_q-layer.6": 3859.6724, "encoder_q-layer.7": 3086.6182, "encoder_q-layer.8": 1975.2324, "encoder_q-layer.9": 1335.192, "epoch": 0.31, "inbatch_neg_score": 0.2141, "inbatch_pos_score": 0.8223, "learning_rate": 3.7944444444444444e-05, "loss": 3.7147, "norm_diff": 0.072, "norm_loss": 0.0, "num_token_doc": 66.6168, "num_token_overlap": 14.5325, "num_token_query": 37.2584, "num_token_union": 65.2804, "num_word_context": 202.102, "num_word_doc": 49.7012, "num_word_query": 27.8819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5396.9769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2136, "query_norm": 1.336, "queue_k_norm": 1.4049, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2584, "sent_len_1": 66.6168, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0225, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6946, "doc_norm": 1.4122, "encoder_q-embeddings": 3175.7397, "encoder_q-layer.0": 2272.7568, "encoder_q-layer.1": 2230.9182, "encoder_q-layer.10": 1396.6921, "encoder_q-layer.11": 3763.3884, "encoder_q-layer.2": 2081.2903, "encoder_q-layer.3": 2035.3062, "encoder_q-layer.4": 2218.4888, "encoder_q-layer.5": 1963.1013, "encoder_q-layer.6": 1751.7592, "encoder_q-layer.7": 1517.2419, "encoder_q-layer.8": 1430.5856, "encoder_q-layer.9": 1211.0918, "epoch": 0.31, "inbatch_neg_score": 0.22, "inbatch_pos_score": 0.8237, "learning_rate": 3.7888888888888894e-05, "loss": 3.6946, "norm_diff": 0.0688, "norm_loss": 0.0, "num_token_doc": 66.5837, "num_token_overlap": 14.6138, "num_token_query": 37.3486, "num_token_union": 65.2444, "num_word_context": 201.9846, "num_word_doc": 49.7096, "num_word_query": 27.9516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3472.0044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2197, "query_norm": 1.3434, "queue_k_norm": 1.4062, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3486, "sent_len_1": 66.5837, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7193, "doc_norm": 1.4049, "encoder_q-embeddings": 1949.5212, "encoder_q-layer.0": 1353.1251, "encoder_q-layer.1": 1457.0674, "encoder_q-layer.10": 1347.6597, "encoder_q-layer.11": 3775.9343, "encoder_q-layer.2": 1624.901, "encoder_q-layer.3": 1637.2336, "encoder_q-layer.4": 1697.7672, "encoder_q-layer.5": 1599.0511, "encoder_q-layer.6": 1438.9469, "encoder_q-layer.7": 1359.3925, "encoder_q-layer.8": 1354.1313, "encoder_q-layer.9": 1202.583, "epoch": 0.31, "inbatch_neg_score": 0.2119, "inbatch_pos_score": 0.7983, "learning_rate": 3.7833333333333336e-05, "loss": 3.7193, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.7075, "num_token_overlap": 14.4938, "num_token_query": 37.2642, "num_token_union": 65.4101, "num_word_context": 202.2792, "num_word_doc": 49.8163, "num_word_query": 27.9328, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2769.1969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2107, "query_norm": 1.3326, "queue_k_norm": 1.4043, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2642, "sent_len_1": 66.7075, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.5137, "stdk": 0.0476, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6786, "doc_norm": 1.4054, "encoder_q-embeddings": 1767.1852, "encoder_q-layer.0": 1162.8514, "encoder_q-layer.1": 1211.1151, "encoder_q-layer.10": 1335.9626, "encoder_q-layer.11": 3627.22, "encoder_q-layer.2": 1365.2432, "encoder_q-layer.3": 1409.7744, "encoder_q-layer.4": 1464.3921, "encoder_q-layer.5": 1458.5, "encoder_q-layer.6": 1574.0709, "encoder_q-layer.7": 1572.3235, "encoder_q-layer.8": 1450.5702, "encoder_q-layer.9": 1229.8608, "epoch": 0.31, "inbatch_neg_score": 0.2132, "inbatch_pos_score": 0.8208, "learning_rate": 3.777777777777778e-05, "loss": 3.6786, "norm_diff": 0.0576, "norm_loss": 0.0, "num_token_doc": 66.8997, "num_token_overlap": 14.648, "num_token_query": 37.3808, "num_token_union": 65.4698, "num_word_context": 202.2749, "num_word_doc": 49.9144, "num_word_query": 28.0087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2660.9025, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2129, "query_norm": 1.3478, "queue_k_norm": 1.4064, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3808, "sent_len_1": 66.8997, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8575, "stdk": 0.0477, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.6892, "doc_norm": 1.4004, "encoder_q-embeddings": 3673.8215, "encoder_q-layer.0": 2678.4951, "encoder_q-layer.1": 3003.0369, "encoder_q-layer.10": 1204.2294, "encoder_q-layer.11": 3537.679, "encoder_q-layer.2": 3498.2478, "encoder_q-layer.3": 3644.355, "encoder_q-layer.4": 3701.2109, "encoder_q-layer.5": 3687.8782, "encoder_q-layer.6": 3204.6272, "encoder_q-layer.7": 2373.627, "encoder_q-layer.8": 1521.6959, "encoder_q-layer.9": 1209.2358, "epoch": 0.31, "inbatch_neg_score": 0.2189, "inbatch_pos_score": 0.814, "learning_rate": 3.772222222222223e-05, "loss": 3.6892, "norm_diff": 0.0466, "norm_loss": 0.0, "num_token_doc": 66.9737, "num_token_overlap": 14.5146, "num_token_query": 37.0895, "num_token_union": 65.397, "num_word_context": 202.5759, "num_word_doc": 49.9166, "num_word_query": 27.7516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4516.0948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2192, "query_norm": 1.3538, "queue_k_norm": 1.4041, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0895, "sent_len_1": 66.9737, "sent_len_max_0": 127.985, "sent_len_max_1": 191.3862, "stdk": 0.0475, "stdq": 0.0439, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6703, "doc_norm": 1.4003, "encoder_q-embeddings": 7531.502, "encoder_q-layer.0": 5288.4009, "encoder_q-layer.1": 5611.5244, "encoder_q-layer.10": 1217.6786, "encoder_q-layer.11": 3476.7893, "encoder_q-layer.2": 5858.9966, "encoder_q-layer.3": 6145.3477, "encoder_q-layer.4": 5763.1089, "encoder_q-layer.5": 5899.9126, "encoder_q-layer.6": 4732.6045, "encoder_q-layer.7": 3611.0803, "encoder_q-layer.8": 2017.8805, "encoder_q-layer.9": 1283.4192, "epoch": 0.31, "inbatch_neg_score": 0.2271, "inbatch_pos_score": 0.8232, "learning_rate": 3.766666666666667e-05, "loss": 3.6703, "norm_diff": 0.0387, "norm_loss": 0.0, "num_token_doc": 67.0713, "num_token_overlap": 14.6349, "num_token_query": 37.2845, "num_token_union": 65.5031, "num_word_context": 202.4984, "num_word_doc": 50.0095, "num_word_query": 27.906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7547.2543, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2258, "query_norm": 1.3616, "queue_k_norm": 1.4051, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2845, "sent_len_1": 67.0713, "sent_len_max_0": 127.98, "sent_len_max_1": 191.2612, "stdk": 0.0475, "stdq": 0.0443, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7004, "doc_norm": 1.4042, "encoder_q-embeddings": 3580.3652, "encoder_q-layer.0": 2425.6348, "encoder_q-layer.1": 2860.5833, "encoder_q-layer.10": 1163.8396, "encoder_q-layer.11": 3272.7188, "encoder_q-layer.2": 3231.2131, "encoder_q-layer.3": 3694.3398, "encoder_q-layer.4": 3845.3574, "encoder_q-layer.5": 3407.9666, "encoder_q-layer.6": 2429.8037, "encoder_q-layer.7": 1906.6644, "encoder_q-layer.8": 1533.3638, "encoder_q-layer.9": 1209.4845, "epoch": 0.32, "inbatch_neg_score": 0.2286, "inbatch_pos_score": 0.8345, "learning_rate": 3.761111111111111e-05, "loss": 3.7004, "norm_diff": 0.0345, "norm_loss": 0.0, "num_token_doc": 66.8553, "num_token_overlap": 14.6102, "num_token_query": 37.4262, "num_token_union": 65.4775, "num_word_context": 202.3485, "num_word_doc": 49.8456, "num_word_query": 28.0346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4262.8474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2292, "query_norm": 1.3697, "queue_k_norm": 1.4035, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4262, "sent_len_1": 66.8553, "sent_len_max_0": 128.0, "sent_len_max_1": 192.0337, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.6865, "doc_norm": 1.4135, "encoder_q-embeddings": 2680.8657, "encoder_q-layer.0": 1828.4304, "encoder_q-layer.1": 2187.7888, "encoder_q-layer.10": 1214.6594, "encoder_q-layer.11": 3598.4944, "encoder_q-layer.2": 2294.553, "encoder_q-layer.3": 2387.6348, "encoder_q-layer.4": 2454.8799, "encoder_q-layer.5": 2312.4482, "encoder_q-layer.6": 2457.7861, "encoder_q-layer.7": 1682.9924, "encoder_q-layer.8": 1437.356, "encoder_q-layer.9": 1190.8551, "epoch": 0.32, "inbatch_neg_score": 0.235, "inbatch_pos_score": 0.8325, "learning_rate": 3.7555555555555554e-05, "loss": 3.6865, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 14.6499, "num_token_query": 37.5716, "num_token_union": 65.4827, "num_word_context": 202.1176, "num_word_doc": 49.7992, "num_word_query": 28.1325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3422.9104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.235, "query_norm": 1.3532, "queue_k_norm": 1.4042, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5716, "sent_len_1": 66.7796, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6725, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.6909, "doc_norm": 1.4005, "encoder_q-embeddings": 1303.2158, "encoder_q-layer.0": 846.7193, "encoder_q-layer.1": 902.6995, "encoder_q-layer.10": 1249.5686, "encoder_q-layer.11": 3417.7495, "encoder_q-layer.2": 976.4803, "encoder_q-layer.3": 1004.36, "encoder_q-layer.4": 1041.3684, "encoder_q-layer.5": 1037.0448, "encoder_q-layer.6": 1045.0647, "encoder_q-layer.7": 1135.6403, "encoder_q-layer.8": 1287.958, "encoder_q-layer.9": 1164.3359, "epoch": 0.32, "inbatch_neg_score": 0.2234, "inbatch_pos_score": 0.8345, "learning_rate": 3.7500000000000003e-05, "loss": 3.6909, "norm_diff": 0.0393, "norm_loss": 0.0, "num_token_doc": 66.8965, "num_token_overlap": 14.5945, "num_token_query": 37.3373, "num_token_union": 65.4946, "num_word_context": 202.6076, "num_word_doc": 49.8838, "num_word_query": 27.9438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2257.1977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2244, "query_norm": 1.3612, "queue_k_norm": 1.4055, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3373, "sent_len_1": 66.8965, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4812, "stdk": 0.0476, "stdq": 0.0442, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.69, "doc_norm": 1.4051, "encoder_q-embeddings": 1564.5232, "encoder_q-layer.0": 1059.126, "encoder_q-layer.1": 1150.8041, "encoder_q-layer.10": 1657.8521, "encoder_q-layer.11": 3645.8738, "encoder_q-layer.2": 1274.7833, "encoder_q-layer.3": 1302.4441, "encoder_q-layer.4": 1285.6771, "encoder_q-layer.5": 1334.4514, "encoder_q-layer.6": 1393.7515, "encoder_q-layer.7": 1443.9493, "encoder_q-layer.8": 1577.9135, "encoder_q-layer.9": 1351.3646, "epoch": 0.32, "inbatch_neg_score": 0.2243, "inbatch_pos_score": 0.8032, "learning_rate": 3.7444444444444446e-05, "loss": 3.69, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.6762, "num_token_overlap": 14.589, "num_token_query": 37.4008, "num_token_union": 65.3619, "num_word_context": 202.1352, "num_word_doc": 49.7717, "num_word_query": 28.0226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2592.4431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.224, "query_norm": 1.3679, "queue_k_norm": 1.4054, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4008, "sent_len_1": 66.6762, "sent_len_max_0": 128.0, "sent_len_max_1": 187.81, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7, "doc_norm": 1.4, "encoder_q-embeddings": 6582.0356, "encoder_q-layer.0": 4733.9829, "encoder_q-layer.1": 4661.9731, "encoder_q-layer.10": 1252.3143, "encoder_q-layer.11": 3722.9878, "encoder_q-layer.2": 4960.2695, "encoder_q-layer.3": 5098.3706, "encoder_q-layer.4": 4921.8613, "encoder_q-layer.5": 4097.2695, "encoder_q-layer.6": 2949.3247, "encoder_q-layer.7": 2457.5242, "encoder_q-layer.8": 1974.8696, "encoder_q-layer.9": 1315.7686, "epoch": 0.32, "inbatch_neg_score": 0.2219, "inbatch_pos_score": 0.8179, "learning_rate": 3.738888888888889e-05, "loss": 3.7, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 66.8003, "num_token_overlap": 14.5775, "num_token_query": 37.4352, "num_token_union": 65.4897, "num_word_context": 202.3215, "num_word_doc": 49.8603, "num_word_query": 28.0485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6456.7037, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2219, "query_norm": 1.3446, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4352, "sent_len_1": 66.8003, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.7512, "stdk": 0.0475, "stdq": 0.0436, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7188, "doc_norm": 1.4084, "encoder_q-embeddings": 2203.4126, "encoder_q-layer.0": 1439.4932, "encoder_q-layer.1": 1504.8793, "encoder_q-layer.10": 1377.4248, "encoder_q-layer.11": 3773.6147, "encoder_q-layer.2": 1834.1489, "encoder_q-layer.3": 1895.7179, "encoder_q-layer.4": 1861.2017, "encoder_q-layer.5": 1890.7922, "encoder_q-layer.6": 1642.8813, "encoder_q-layer.7": 1597.4064, "encoder_q-layer.8": 1573.9697, "encoder_q-layer.9": 1271.5891, "epoch": 0.32, "inbatch_neg_score": 0.2217, "inbatch_pos_score": 0.8257, "learning_rate": 3.733333333333334e-05, "loss": 3.7188, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 14.5874, "num_token_query": 37.3856, "num_token_union": 65.4989, "num_word_context": 202.4927, "num_word_doc": 49.8514, "num_word_query": 27.9947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3012.1378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2231, "query_norm": 1.3657, "queue_k_norm": 1.4035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3856, "sent_len_1": 66.8562, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7875, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7095, "doc_norm": 1.4092, "encoder_q-embeddings": 1482.4574, "encoder_q-layer.0": 1012.9683, "encoder_q-layer.1": 1115.4619, "encoder_q-layer.10": 1310.1362, "encoder_q-layer.11": 3546.7256, "encoder_q-layer.2": 1294.1643, "encoder_q-layer.3": 1291.8673, "encoder_q-layer.4": 1287.8969, "encoder_q-layer.5": 1276.0587, "encoder_q-layer.6": 1150.3287, "encoder_q-layer.7": 1151.1982, "encoder_q-layer.8": 1339.4114, "encoder_q-layer.9": 1216.2313, "epoch": 0.32, "inbatch_neg_score": 0.2209, "inbatch_pos_score": 0.8213, "learning_rate": 3.727777777777778e-05, "loss": 3.7095, "norm_diff": 0.0737, "norm_loss": 0.0, "num_token_doc": 66.6764, "num_token_overlap": 14.4717, "num_token_query": 37.1005, "num_token_union": 65.2696, "num_word_context": 202.3521, "num_word_doc": 49.7718, "num_word_query": 27.7637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2376.0476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2197, "query_norm": 1.3355, "queue_k_norm": 1.4031, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1005, "sent_len_1": 66.6764, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8175, "stdk": 0.0478, "stdq": 0.0435, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.688, "doc_norm": 1.4035, "encoder_q-embeddings": 1686.5262, "encoder_q-layer.0": 1106.5199, "encoder_q-layer.1": 1165.4386, "encoder_q-layer.10": 1297.5411, "encoder_q-layer.11": 3277.1494, "encoder_q-layer.2": 1256.7872, "encoder_q-layer.3": 1336.9441, "encoder_q-layer.4": 1416.6274, "encoder_q-layer.5": 1523.0211, "encoder_q-layer.6": 1777.1648, "encoder_q-layer.7": 1530.2815, "encoder_q-layer.8": 1339.573, "encoder_q-layer.9": 1136.9135, "epoch": 0.32, "inbatch_neg_score": 0.219, "inbatch_pos_score": 0.8589, "learning_rate": 3.722222222222222e-05, "loss": 3.688, "norm_diff": 0.0425, "norm_loss": 0.0, "num_token_doc": 66.7763, "num_token_overlap": 14.5337, "num_token_query": 37.0944, "num_token_union": 65.2563, "num_word_context": 201.9818, "num_word_doc": 49.802, "num_word_query": 27.7595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2488.9919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2192, "query_norm": 1.3611, "queue_k_norm": 1.4049, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0944, "sent_len_1": 66.7763, "sent_len_max_0": 127.9862, "sent_len_max_1": 191.1175, "stdk": 0.0477, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6911, "doc_norm": 1.4016, "encoder_q-embeddings": 9600.9316, "encoder_q-layer.0": 8373.4111, "encoder_q-layer.1": 8254.0, "encoder_q-layer.10": 1226.0139, "encoder_q-layer.11": 3582.8054, "encoder_q-layer.2": 6536.3535, "encoder_q-layer.3": 5270.4365, "encoder_q-layer.4": 5223.1035, "encoder_q-layer.5": 4314.9785, "encoder_q-layer.6": 4181.9585, "encoder_q-layer.7": 3384.2227, "encoder_q-layer.8": 2311.207, "encoder_q-layer.9": 1228.6149, "epoch": 0.32, "inbatch_neg_score": 0.2111, "inbatch_pos_score": 0.8198, "learning_rate": 3.7166666666666664e-05, "loss": 3.6911, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.636, "num_token_overlap": 14.6268, "num_token_query": 37.3193, "num_token_union": 65.2661, "num_word_context": 202.0857, "num_word_doc": 49.7278, "num_word_query": 27.9381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8685.287, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2122, "query_norm": 1.3383, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3193, "sent_len_1": 66.636, "sent_len_max_0": 128.0, "sent_len_max_1": 188.495, "stdk": 0.0476, "stdq": 0.0438, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6729, "doc_norm": 1.408, "encoder_q-embeddings": 1615.2228, "encoder_q-layer.0": 1138.7688, "encoder_q-layer.1": 1166.1783, "encoder_q-layer.10": 1170.1599, "encoder_q-layer.11": 3292.7627, "encoder_q-layer.2": 1230.2264, "encoder_q-layer.3": 1160.0801, "encoder_q-layer.4": 1075.8276, "encoder_q-layer.5": 983.4072, "encoder_q-layer.6": 1081.3943, "encoder_q-layer.7": 1122.2358, "encoder_q-layer.8": 1294.0458, "encoder_q-layer.9": 1150.4974, "epoch": 0.32, "inbatch_neg_score": 0.2113, "inbatch_pos_score": 0.8179, "learning_rate": 3.7111111111111113e-05, "loss": 3.6729, "norm_diff": 0.0562, "norm_loss": 0.0, "num_token_doc": 66.912, "num_token_overlap": 14.6506, "num_token_query": 37.425, "num_token_union": 65.5312, "num_word_context": 202.5354, "num_word_doc": 49.9218, "num_word_query": 28.031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2340.1456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.212, "query_norm": 1.3519, "queue_k_norm": 1.405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.425, "sent_len_1": 66.912, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7413, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6883, "doc_norm": 1.4021, "encoder_q-embeddings": 5454.958, "encoder_q-layer.0": 3696.2759, "encoder_q-layer.1": 3972.7092, "encoder_q-layer.10": 1146.0681, "encoder_q-layer.11": 3272.3271, "encoder_q-layer.2": 4861.5317, "encoder_q-layer.3": 4307.0825, "encoder_q-layer.4": 4462.8071, "encoder_q-layer.5": 4456.1807, "encoder_q-layer.6": 2843.6348, "encoder_q-layer.7": 2384.5898, "encoder_q-layer.8": 1701.0786, "encoder_q-layer.9": 1110.5179, "epoch": 0.33, "inbatch_neg_score": 0.2157, "inbatch_pos_score": 0.8306, "learning_rate": 3.705555555555556e-05, "loss": 3.6883, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.7729, "num_token_overlap": 14.5984, "num_token_query": 37.536, "num_token_union": 65.458, "num_word_context": 202.6249, "num_word_doc": 49.8359, "num_word_query": 28.1273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5694.1263, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2147, "query_norm": 1.335, "queue_k_norm": 1.403, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.536, "sent_len_1": 66.7729, "sent_len_max_0": 128.0, "sent_len_max_1": 190.26, "stdk": 0.0477, "stdq": 0.0434, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6703, "doc_norm": 1.4075, "encoder_q-embeddings": 3237.6501, "encoder_q-layer.0": 2395.3579, "encoder_q-layer.1": 2912.0925, "encoder_q-layer.10": 1229.0735, "encoder_q-layer.11": 3225.7505, "encoder_q-layer.2": 3476.0649, "encoder_q-layer.3": 3012.4236, "encoder_q-layer.4": 2825.3354, "encoder_q-layer.5": 2756.2937, "encoder_q-layer.6": 2595.5134, "encoder_q-layer.7": 1665.3138, "encoder_q-layer.8": 1509.4423, "encoder_q-layer.9": 1157.7169, "epoch": 0.33, "inbatch_neg_score": 0.2153, "inbatch_pos_score": 0.814, "learning_rate": 3.7e-05, "loss": 3.6703, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 67.1349, "num_token_overlap": 14.5785, "num_token_query": 37.1467, "num_token_union": 65.4446, "num_word_context": 202.8193, "num_word_doc": 50.1011, "num_word_query": 27.773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3963.0713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2148, "query_norm": 1.3436, "queue_k_norm": 1.4051, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1467, "sent_len_1": 67.1349, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.52, "stdk": 0.0479, "stdq": 0.0435, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.6664, "doc_norm": 1.4048, "encoder_q-embeddings": 3875.512, "encoder_q-layer.0": 2629.8677, "encoder_q-layer.1": 2776.897, "encoder_q-layer.10": 1355.9999, "encoder_q-layer.11": 3395.8154, "encoder_q-layer.2": 3393.8716, "encoder_q-layer.3": 3433.686, "encoder_q-layer.4": 3499.9978, "encoder_q-layer.5": 4115.7437, "encoder_q-layer.6": 4047.5325, "encoder_q-layer.7": 2776.5825, "encoder_q-layer.8": 2353.2256, "encoder_q-layer.9": 1351.2699, "epoch": 0.33, "inbatch_neg_score": 0.2134, "inbatch_pos_score": 0.8105, "learning_rate": 3.694444444444445e-05, "loss": 3.6664, "norm_diff": 0.0523, "norm_loss": 0.0, "num_token_doc": 66.8319, "num_token_overlap": 14.6582, "num_token_query": 37.4718, "num_token_union": 65.409, "num_word_context": 202.3992, "num_word_doc": 49.8292, "num_word_query": 28.0487, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4749.3151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2134, "query_norm": 1.3525, "queue_k_norm": 1.4055, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4718, "sent_len_1": 66.8319, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.87, "stdk": 0.0478, "stdq": 0.0435, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.6664, "doc_norm": 1.3973, "encoder_q-embeddings": 8471.8994, "encoder_q-layer.0": 6760.3999, "encoder_q-layer.1": 7024.6616, "encoder_q-layer.10": 1317.006, "encoder_q-layer.11": 3444.6753, "encoder_q-layer.2": 8192.3203, "encoder_q-layer.3": 7377.2695, "encoder_q-layer.4": 7240.0063, "encoder_q-layer.5": 6137.4062, "encoder_q-layer.6": 5402.2949, "encoder_q-layer.7": 3865.002, "encoder_q-layer.8": 2523.957, "encoder_q-layer.9": 1262.8568, "epoch": 0.33, "inbatch_neg_score": 0.2133, "inbatch_pos_score": 0.7935, "learning_rate": 3.688888888888889e-05, "loss": 3.6664, "norm_diff": 0.048, "norm_loss": 0.0, "num_token_doc": 66.8846, "num_token_overlap": 14.5741, "num_token_query": 37.3075, "num_token_union": 65.5007, "num_word_context": 202.6391, "num_word_doc": 49.8753, "num_word_query": 27.9633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9116.5259, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2117, "query_norm": 1.3493, "queue_k_norm": 1.4059, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3075, "sent_len_1": 66.8846, "sent_len_max_0": 127.99, "sent_len_max_1": 190.3475, "stdk": 0.0475, "stdq": 0.0434, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.6779, "doc_norm": 1.4066, "encoder_q-embeddings": 3588.447, "encoder_q-layer.0": 2369.8779, "encoder_q-layer.1": 2860.457, "encoder_q-layer.10": 2359.0481, "encoder_q-layer.11": 6623.4541, "encoder_q-layer.2": 3345.5549, "encoder_q-layer.3": 3682.0498, "encoder_q-layer.4": 3592.4116, "encoder_q-layer.5": 3162.3911, "encoder_q-layer.6": 2834.189, "encoder_q-layer.7": 2095.6836, "encoder_q-layer.8": 2480.9944, "encoder_q-layer.9": 2298.4731, "epoch": 0.33, "inbatch_neg_score": 0.2096, "inbatch_pos_score": 0.8115, "learning_rate": 3.683333333333334e-05, "loss": 3.6779, "norm_diff": 0.0419, "norm_loss": 0.0, "num_token_doc": 66.8352, "num_token_overlap": 14.5596, "num_token_query": 37.278, "num_token_union": 65.4263, "num_word_context": 202.3797, "num_word_doc": 49.879, "num_word_query": 27.9186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5240.9625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2098, "query_norm": 1.3648, "queue_k_norm": 1.4028, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.278, "sent_len_1": 66.8352, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.985, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6702, "doc_norm": 1.3998, "encoder_q-embeddings": 10833.3682, "encoder_q-layer.0": 8690.1816, "encoder_q-layer.1": 9216.5732, "encoder_q-layer.10": 2590.7332, "encoder_q-layer.11": 6716.9619, "encoder_q-layer.2": 10325.4141, "encoder_q-layer.3": 10908.3818, "encoder_q-layer.4": 11451.2666, "encoder_q-layer.5": 11810.249, "encoder_q-layer.6": 8499.1055, "encoder_q-layer.7": 3214.7397, "encoder_q-layer.8": 2967.1011, "encoder_q-layer.9": 2532.9741, "epoch": 0.33, "inbatch_neg_score": 0.212, "inbatch_pos_score": 0.8027, "learning_rate": 3.677777777777778e-05, "loss": 3.6702, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.4914, "num_token_overlap": 14.5614, "num_token_query": 37.2367, "num_token_union": 65.1931, "num_word_context": 201.7585, "num_word_doc": 49.6201, "num_word_query": 27.8821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13044.6241, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2114, "query_norm": 1.3533, "queue_k_norm": 1.4034, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2367, "sent_len_1": 66.4914, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7875, "stdk": 0.0476, "stdq": 0.0435, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6588, "doc_norm": 1.4069, "encoder_q-embeddings": 1327.4686, "encoder_q-layer.0": 886.5913, "encoder_q-layer.1": 943.8399, "encoder_q-layer.10": 1252.9805, "encoder_q-layer.11": 3352.0474, "encoder_q-layer.2": 1028.7885, "encoder_q-layer.3": 1068.6016, "encoder_q-layer.4": 1076.3335, "encoder_q-layer.5": 1041.1935, "encoder_q-layer.6": 1269.2843, "encoder_q-layer.7": 1139.3322, "encoder_q-layer.8": 1330.0143, "encoder_q-layer.9": 1228.6132, "epoch": 0.33, "inbatch_neg_score": 0.2109, "inbatch_pos_score": 0.812, "learning_rate": 3.672222222222222e-05, "loss": 3.6588, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.8995, "num_token_overlap": 14.6715, "num_token_query": 37.4625, "num_token_union": 65.4386, "num_word_context": 201.9931, "num_word_doc": 49.9175, "num_word_query": 28.059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2264.1788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2111, "query_norm": 1.3575, "queue_k_norm": 1.4038, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4625, "sent_len_1": 66.8995, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.8688, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6566, "doc_norm": 1.4072, "encoder_q-embeddings": 3192.0393, "encoder_q-layer.0": 2107.0903, "encoder_q-layer.1": 2127.3167, "encoder_q-layer.10": 1238.7509, "encoder_q-layer.11": 3367.5334, "encoder_q-layer.2": 2362.4919, "encoder_q-layer.3": 2521.2231, "encoder_q-layer.4": 2828.761, "encoder_q-layer.5": 2469.5337, "encoder_q-layer.6": 2220.6096, "encoder_q-layer.7": 1845.8098, "encoder_q-layer.8": 1584.9906, "encoder_q-layer.9": 1272.2471, "epoch": 0.33, "inbatch_neg_score": 0.2077, "inbatch_pos_score": 0.8101, "learning_rate": 3.6666666666666666e-05, "loss": 3.6566, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.8521, "num_token_overlap": 14.587, "num_token_query": 37.3445, "num_token_union": 65.4993, "num_word_context": 202.1515, "num_word_doc": 49.9281, "num_word_query": 27.9775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3597.594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2068, "query_norm": 1.3625, "queue_k_norm": 1.406, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3445, "sent_len_1": 66.8521, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8887, "stdk": 0.0479, "stdq": 0.0443, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.6815, "doc_norm": 1.4038, "encoder_q-embeddings": 1591.375, "encoder_q-layer.0": 1074.0431, "encoder_q-layer.1": 1163.4822, "encoder_q-layer.10": 1216.3, "encoder_q-layer.11": 3180.9595, "encoder_q-layer.2": 1346.9773, "encoder_q-layer.3": 1457.5508, "encoder_q-layer.4": 1484.6125, "encoder_q-layer.5": 1445.9559, "encoder_q-layer.6": 1378.2532, "encoder_q-layer.7": 1179.8348, "encoder_q-layer.8": 1226.6997, "encoder_q-layer.9": 1129.9601, "epoch": 0.33, "inbatch_neg_score": 0.2114, "inbatch_pos_score": 0.8003, "learning_rate": 3.6611111111111115e-05, "loss": 3.6815, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.8582, "num_token_overlap": 14.5339, "num_token_query": 37.2111, "num_token_union": 65.3451, "num_word_context": 201.9925, "num_word_doc": 49.8338, "num_word_query": 27.8672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2348.7347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2112, "query_norm": 1.3415, "queue_k_norm": 1.4051, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2111, "sent_len_1": 66.8582, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.9313, "stdk": 0.0478, "stdq": 0.0436, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6675, "doc_norm": 1.4121, "encoder_q-embeddings": 1237.4905, "encoder_q-layer.0": 840.8757, "encoder_q-layer.1": 874.0894, "encoder_q-layer.10": 1257.3865, "encoder_q-layer.11": 3465.676, "encoder_q-layer.2": 959.0269, "encoder_q-layer.3": 1010.8696, "encoder_q-layer.4": 1054.9528, "encoder_q-layer.5": 951.342, "encoder_q-layer.6": 1047.7789, "encoder_q-layer.7": 1165.3483, "encoder_q-layer.8": 1353.0996, "encoder_q-layer.9": 1208.0677, "epoch": 0.33, "inbatch_neg_score": 0.2079, "inbatch_pos_score": 0.8286, "learning_rate": 3.655555555555556e-05, "loss": 3.6675, "norm_diff": 0.0891, "norm_loss": 0.0, "num_token_doc": 66.7238, "num_token_overlap": 14.5572, "num_token_query": 37.147, "num_token_union": 65.2793, "num_word_context": 202.1884, "num_word_doc": 49.7898, "num_word_query": 27.8051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2203.9986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.209, "query_norm": 1.3229, "queue_k_norm": 1.4062, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.147, "sent_len_1": 66.7238, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9125, "stdk": 0.0481, "stdq": 0.0432, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6613, "doc_norm": 1.4056, "encoder_q-embeddings": 10611.5898, "encoder_q-layer.0": 8263.7656, "encoder_q-layer.1": 10369.085, "encoder_q-layer.10": 1206.8896, "encoder_q-layer.11": 3255.6548, "encoder_q-layer.2": 11921.0322, "encoder_q-layer.3": 12662.5908, "encoder_q-layer.4": 10914.9268, "encoder_q-layer.5": 7966.1265, "encoder_q-layer.6": 8217.8311, "encoder_q-layer.7": 5398.6602, "encoder_q-layer.8": 3041.1021, "encoder_q-layer.9": 1290.094, "epoch": 0.33, "inbatch_neg_score": 0.2133, "inbatch_pos_score": 0.8228, "learning_rate": 3.65e-05, "loss": 3.6613, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.9526, "num_token_overlap": 14.5799, "num_token_query": 37.1748, "num_token_union": 65.382, "num_word_context": 202.0099, "num_word_doc": 49.9773, "num_word_query": 27.8244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12884.4427, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2142, "query_norm": 1.3432, "queue_k_norm": 1.4054, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1748, "sent_len_1": 66.9526, "sent_len_max_0": 127.9988, "sent_len_max_1": 186.9775, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.6684, "doc_norm": 1.403, "encoder_q-embeddings": 3723.8772, "encoder_q-layer.0": 2730.1973, "encoder_q-layer.1": 2577.7825, "encoder_q-layer.10": 1226.7603, "encoder_q-layer.11": 3174.5479, "encoder_q-layer.2": 2574.0774, "encoder_q-layer.3": 2195.2117, "encoder_q-layer.4": 2100.2693, "encoder_q-layer.5": 1563.0682, "encoder_q-layer.6": 1418.3438, "encoder_q-layer.7": 1465.2068, "encoder_q-layer.8": 1410.822, "encoder_q-layer.9": 1208.8511, "epoch": 0.34, "inbatch_neg_score": 0.2134, "inbatch_pos_score": 0.8496, "learning_rate": 3.644444444444445e-05, "loss": 3.6684, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.7994, "num_token_overlap": 14.5593, "num_token_query": 37.3197, "num_token_union": 65.3752, "num_word_context": 202.2951, "num_word_doc": 49.804, "num_word_query": 27.9062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3556.4375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2129, "query_norm": 1.3551, "queue_k_norm": 1.4041, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3197, "sent_len_1": 66.7994, "sent_len_max_0": 128.0, "sent_len_max_1": 190.97, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.6372, "doc_norm": 1.4094, "encoder_q-embeddings": 1173.3516, "encoder_q-layer.0": 763.8302, "encoder_q-layer.1": 810.7789, "encoder_q-layer.10": 1314.3406, "encoder_q-layer.11": 3059.6868, "encoder_q-layer.2": 878.4965, "encoder_q-layer.3": 885.5356, "encoder_q-layer.4": 937.5875, "encoder_q-layer.5": 924.2874, "encoder_q-layer.6": 1047.3319, "encoder_q-layer.7": 1156.5737, "encoder_q-layer.8": 1296.7711, "encoder_q-layer.9": 1163.2141, "epoch": 0.34, "inbatch_neg_score": 0.214, "inbatch_pos_score": 0.8457, "learning_rate": 3.638888888888889e-05, "loss": 3.6372, "norm_diff": 0.0541, "norm_loss": 0.0, "num_token_doc": 66.8529, "num_token_overlap": 14.6255, "num_token_query": 37.3476, "num_token_union": 65.3981, "num_word_context": 202.4684, "num_word_doc": 49.8689, "num_word_query": 27.9796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2032.7212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2133, "query_norm": 1.3553, "queue_k_norm": 1.407, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3476, "sent_len_1": 66.8529, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1612, "stdk": 0.048, "stdq": 0.0444, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6578, "doc_norm": 1.4016, "encoder_q-embeddings": 639.8954, "encoder_q-layer.0": 426.5172, "encoder_q-layer.1": 440.564, "encoder_q-layer.10": 630.8223, "encoder_q-layer.11": 1600.9583, "encoder_q-layer.2": 484.4184, "encoder_q-layer.3": 518.9533, "encoder_q-layer.4": 536.8326, "encoder_q-layer.5": 540.2455, "encoder_q-layer.6": 592.1652, "encoder_q-layer.7": 609.8259, "encoder_q-layer.8": 695.7598, "encoder_q-layer.9": 633.0052, "epoch": 0.34, "inbatch_neg_score": 0.2119, "inbatch_pos_score": 0.8291, "learning_rate": 3.633333333333333e-05, "loss": 3.6578, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 67.0228, "num_token_overlap": 14.6301, "num_token_query": 37.545, "num_token_union": 65.6525, "num_word_context": 202.5548, "num_word_doc": 49.9745, "num_word_query": 28.0841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1086.9646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2125, "query_norm": 1.3641, "queue_k_norm": 1.4065, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.545, "sent_len_1": 67.0228, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5387, "stdk": 0.0477, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.6711, "doc_norm": 1.404, "encoder_q-embeddings": 1614.791, "encoder_q-layer.0": 1186.1385, "encoder_q-layer.1": 1278.7103, "encoder_q-layer.10": 653.5501, "encoder_q-layer.11": 1596.895, "encoder_q-layer.2": 1629.9497, "encoder_q-layer.3": 1681.996, "encoder_q-layer.4": 1654.8832, "encoder_q-layer.5": 1476.5062, "encoder_q-layer.6": 1173.4777, "encoder_q-layer.7": 824.825, "encoder_q-layer.8": 761.0902, "encoder_q-layer.9": 617.109, "epoch": 0.34, "inbatch_neg_score": 0.2061, "inbatch_pos_score": 0.8101, "learning_rate": 3.6277777777777776e-05, "loss": 3.6711, "norm_diff": 0.06, "norm_loss": 0.0, "num_token_doc": 66.7267, "num_token_overlap": 14.5057, "num_token_query": 37.17, "num_token_union": 65.2825, "num_word_context": 201.8215, "num_word_doc": 49.815, "num_word_query": 27.8112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1975.962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2063, "query_norm": 1.344, "queue_k_norm": 1.4048, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.17, "sent_len_1": 66.7267, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3963, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.6688, "doc_norm": 1.4069, "encoder_q-embeddings": 857.6561, "encoder_q-layer.0": 617.6218, "encoder_q-layer.1": 676.8111, "encoder_q-layer.10": 615.5024, "encoder_q-layer.11": 1631.0342, "encoder_q-layer.2": 751.1933, "encoder_q-layer.3": 809.0639, "encoder_q-layer.4": 838.3723, "encoder_q-layer.5": 900.5177, "encoder_q-layer.6": 604.4672, "encoder_q-layer.7": 609.6819, "encoder_q-layer.8": 653.5165, "encoder_q-layer.9": 562.9247, "epoch": 0.34, "inbatch_neg_score": 0.2022, "inbatch_pos_score": 0.8013, "learning_rate": 3.6222222222222225e-05, "loss": 3.6688, "norm_diff": 0.0714, "norm_loss": 0.0, "num_token_doc": 66.598, "num_token_overlap": 14.6078, "num_token_query": 37.3864, "num_token_union": 65.3019, "num_word_context": 202.4363, "num_word_doc": 49.7313, "num_word_query": 28.0095, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1271.8852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2026, "query_norm": 1.3356, "queue_k_norm": 1.4044, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3864, "sent_len_1": 66.598, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3388, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6504, "doc_norm": 1.4001, "encoder_q-embeddings": 566.6652, "encoder_q-layer.0": 386.1727, "encoder_q-layer.1": 396.6572, "encoder_q-layer.10": 606.692, "encoder_q-layer.11": 1542.4382, "encoder_q-layer.2": 453.2229, "encoder_q-layer.3": 443.6959, "encoder_q-layer.4": 451.7534, "encoder_q-layer.5": 445.9984, "encoder_q-layer.6": 489.0601, "encoder_q-layer.7": 532.7774, "encoder_q-layer.8": 647.6957, "encoder_q-layer.9": 564.2447, "epoch": 0.34, "inbatch_neg_score": 0.2003, "inbatch_pos_score": 0.8047, "learning_rate": 3.6166666666666674e-05, "loss": 3.6504, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.861, "num_token_overlap": 14.605, "num_token_query": 37.3339, "num_token_union": 65.4624, "num_word_context": 202.369, "num_word_doc": 49.8559, "num_word_query": 27.9441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1000.2396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2002, "query_norm": 1.3423, "queue_k_norm": 1.4061, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3339, "sent_len_1": 66.861, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0238, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6424, "doc_norm": 1.4021, "encoder_q-embeddings": 773.1193, "encoder_q-layer.0": 526.7139, "encoder_q-layer.1": 561.5141, "encoder_q-layer.10": 604.4349, "encoder_q-layer.11": 1613.0758, "encoder_q-layer.2": 620.8583, "encoder_q-layer.3": 651.4897, "encoder_q-layer.4": 661.329, "encoder_q-layer.5": 603.2704, "encoder_q-layer.6": 622.9442, "encoder_q-layer.7": 669.711, "encoder_q-layer.8": 651.4122, "encoder_q-layer.9": 582.8272, "epoch": 0.34, "inbatch_neg_score": 0.1974, "inbatch_pos_score": 0.793, "learning_rate": 3.611111111111111e-05, "loss": 3.6424, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 66.8843, "num_token_overlap": 14.643, "num_token_query": 37.4774, "num_token_union": 65.4704, "num_word_context": 202.3761, "num_word_doc": 49.9502, "num_word_query": 28.0764, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1155.0735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1963, "query_norm": 1.34, "queue_k_norm": 1.4064, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4774, "sent_len_1": 66.8843, "sent_len_max_0": 127.995, "sent_len_max_1": 188.4663, "stdk": 0.0477, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6404, "doc_norm": 1.4068, "encoder_q-embeddings": 1406.6805, "encoder_q-layer.0": 1033.4408, "encoder_q-layer.1": 1122.0181, "encoder_q-layer.10": 731.8845, "encoder_q-layer.11": 1656.4537, "encoder_q-layer.2": 1287.944, "encoder_q-layer.3": 1359.3729, "encoder_q-layer.4": 1390.7284, "encoder_q-layer.5": 1396.2301, "encoder_q-layer.6": 1550.2469, "encoder_q-layer.7": 1290.4916, "encoder_q-layer.8": 934.8298, "encoder_q-layer.9": 695.4359, "epoch": 0.34, "inbatch_neg_score": 0.1965, "inbatch_pos_score": 0.8008, "learning_rate": 3.605555555555556e-05, "loss": 3.6404, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.6904, "num_token_overlap": 14.5353, "num_token_query": 37.1632, "num_token_union": 65.2279, "num_word_context": 201.9911, "num_word_doc": 49.7576, "num_word_query": 27.8338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1881.3582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1962, "query_norm": 1.3585, "queue_k_norm": 1.4043, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1632, "sent_len_1": 66.6904, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6188, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.6325, "doc_norm": 1.4156, "encoder_q-embeddings": 626.0065, "encoder_q-layer.0": 415.2842, "encoder_q-layer.1": 431.0562, "encoder_q-layer.10": 599.595, "encoder_q-layer.11": 1518.9911, "encoder_q-layer.2": 485.5896, "encoder_q-layer.3": 499.1096, "encoder_q-layer.4": 520.4408, "encoder_q-layer.5": 506.1533, "encoder_q-layer.6": 553.2642, "encoder_q-layer.7": 561.5853, "encoder_q-layer.8": 641.3599, "encoder_q-layer.9": 570.6873, "epoch": 0.34, "inbatch_neg_score": 0.1965, "inbatch_pos_score": 0.8154, "learning_rate": 3.6e-05, "loss": 3.6325, "norm_diff": 0.0818, "norm_loss": 0.0, "num_token_doc": 66.8152, "num_token_overlap": 14.5867, "num_token_query": 37.1571, "num_token_union": 65.3294, "num_word_context": 202.2939, "num_word_doc": 49.8184, "num_word_query": 27.7933, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.4101, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1957, "query_norm": 1.3338, "queue_k_norm": 1.4031, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1571, "sent_len_1": 66.8152, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8462, "stdk": 0.0483, "stdq": 0.0434, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.6338, "doc_norm": 1.4022, "encoder_q-embeddings": 670.3145, "encoder_q-layer.0": 441.9733, "encoder_q-layer.1": 460.0264, "encoder_q-layer.10": 607.4645, "encoder_q-layer.11": 1519.0485, "encoder_q-layer.2": 505.0644, "encoder_q-layer.3": 510.4407, "encoder_q-layer.4": 501.2731, "encoder_q-layer.5": 504.3518, "encoder_q-layer.6": 534.7207, "encoder_q-layer.7": 562.6567, "encoder_q-layer.8": 618.1493, "encoder_q-layer.9": 578.1917, "epoch": 0.34, "inbatch_neg_score": 0.1934, "inbatch_pos_score": 0.8047, "learning_rate": 3.594444444444445e-05, "loss": 3.6338, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.9551, "num_token_overlap": 14.6431, "num_token_query": 37.3224, "num_token_union": 65.4746, "num_word_context": 202.2788, "num_word_doc": 49.9921, "num_word_query": 27.9313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1039.7977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1926, "query_norm": 1.3388, "queue_k_norm": 1.4037, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3224, "sent_len_1": 66.9551, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5387, "stdk": 0.0478, "stdq": 0.0438, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.6494, "doc_norm": 1.3989, "encoder_q-embeddings": 583.7878, "encoder_q-layer.0": 387.6033, "encoder_q-layer.1": 407.7927, "encoder_q-layer.10": 561.0549, "encoder_q-layer.11": 1488.3899, "encoder_q-layer.2": 443.9609, "encoder_q-layer.3": 447.1228, "encoder_q-layer.4": 465.174, "encoder_q-layer.5": 455.1272, "encoder_q-layer.6": 491.2635, "encoder_q-layer.7": 518.0351, "encoder_q-layer.8": 623.0973, "encoder_q-layer.9": 572.8282, "epoch": 0.35, "inbatch_neg_score": 0.1953, "inbatch_pos_score": 0.8062, "learning_rate": 3.5888888888888886e-05, "loss": 3.6494, "norm_diff": 0.0532, "norm_loss": 0.0, "num_token_doc": 66.637, "num_token_overlap": 14.5723, "num_token_query": 37.348, "num_token_union": 65.3857, "num_word_context": 202.3193, "num_word_doc": 49.7381, "num_word_query": 27.9528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 982.4183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1952, "query_norm": 1.3457, "queue_k_norm": 1.4005, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.348, "sent_len_1": 66.637, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0513, "stdk": 0.0477, "stdq": 0.0437, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6212, "doc_norm": 1.403, "encoder_q-embeddings": 752.7754, "encoder_q-layer.0": 476.9884, "encoder_q-layer.1": 486.8943, "encoder_q-layer.10": 608.1376, "encoder_q-layer.11": 1589.3374, "encoder_q-layer.2": 530.0728, "encoder_q-layer.3": 511.5408, "encoder_q-layer.4": 492.691, "encoder_q-layer.5": 502.6177, "encoder_q-layer.6": 535.733, "encoder_q-layer.7": 591.951, "encoder_q-layer.8": 667.3809, "encoder_q-layer.9": 580.4317, "epoch": 0.35, "inbatch_neg_score": 0.2021, "inbatch_pos_score": 0.8223, "learning_rate": 3.5833333333333335e-05, "loss": 3.6212, "norm_diff": 0.0281, "norm_loss": 0.0, "num_token_doc": 66.7653, "num_token_overlap": 14.6241, "num_token_query": 37.5073, "num_token_union": 65.4442, "num_word_context": 202.093, "num_word_doc": 49.7682, "num_word_query": 28.0737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1099.9604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2009, "query_norm": 1.375, "queue_k_norm": 1.402, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5073, "sent_len_1": 66.7653, "sent_len_max_0": 128.0, "sent_len_max_1": 191.035, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6377, "doc_norm": 1.399, "encoder_q-embeddings": 749.0449, "encoder_q-layer.0": 518.2416, "encoder_q-layer.1": 551.8936, "encoder_q-layer.10": 648.6122, "encoder_q-layer.11": 1546.6283, "encoder_q-layer.2": 586.5308, "encoder_q-layer.3": 602.5181, "encoder_q-layer.4": 593.0959, "encoder_q-layer.5": 551.0101, "encoder_q-layer.6": 547.8604, "encoder_q-layer.7": 577.5296, "encoder_q-layer.8": 684.7258, "encoder_q-layer.9": 592.0015, "epoch": 0.35, "inbatch_neg_score": 0.2092, "inbatch_pos_score": 0.8232, "learning_rate": 3.577777777777778e-05, "loss": 3.6377, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.6968, "num_token_overlap": 14.5188, "num_token_query": 37.2858, "num_token_union": 65.3544, "num_word_context": 202.202, "num_word_doc": 49.7627, "num_word_query": 27.9237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1086.31, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2092, "query_norm": 1.3657, "queue_k_norm": 1.4013, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2858, "sent_len_1": 66.6968, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7312, "stdk": 0.0478, "stdq": 0.0443, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.6281, "doc_norm": 1.4038, "encoder_q-embeddings": 1310.6859, "encoder_q-layer.0": 876.91, "encoder_q-layer.1": 975.1533, "encoder_q-layer.10": 599.8449, "encoder_q-layer.11": 1476.3514, "encoder_q-layer.2": 1087.252, "encoder_q-layer.3": 1173.173, "encoder_q-layer.4": 1290.604, "encoder_q-layer.5": 1307.4097, "encoder_q-layer.6": 905.697, "encoder_q-layer.7": 715.7025, "encoder_q-layer.8": 626.7057, "encoder_q-layer.9": 581.1668, "epoch": 0.35, "inbatch_neg_score": 0.2066, "inbatch_pos_score": 0.8179, "learning_rate": 3.5722222222222226e-05, "loss": 3.6281, "norm_diff": 0.0695, "norm_loss": 0.0, "num_token_doc": 66.9135, "num_token_overlap": 14.5581, "num_token_query": 37.3556, "num_token_union": 65.5117, "num_word_context": 202.4558, "num_word_doc": 49.924, "num_word_query": 27.9698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1578.3392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2065, "query_norm": 1.3369, "queue_k_norm": 1.403, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3556, "sent_len_1": 66.9135, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5775, "stdk": 0.0479, "stdq": 0.0435, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.618, "doc_norm": 1.4012, "encoder_q-embeddings": 733.9279, "encoder_q-layer.0": 530.1454, "encoder_q-layer.1": 550.4583, "encoder_q-layer.10": 624.9819, "encoder_q-layer.11": 1603.5646, "encoder_q-layer.2": 666.9709, "encoder_q-layer.3": 655.1841, "encoder_q-layer.4": 645.2804, "encoder_q-layer.5": 583.4527, "encoder_q-layer.6": 544.6045, "encoder_q-layer.7": 572.8852, "encoder_q-layer.8": 621.5955, "encoder_q-layer.9": 593.0174, "epoch": 0.35, "inbatch_neg_score": 0.2092, "inbatch_pos_score": 0.8179, "learning_rate": 3.566666666666667e-05, "loss": 3.618, "norm_diff": 0.0562, "norm_loss": 0.0, "num_token_doc": 66.9411, "num_token_overlap": 14.6354, "num_token_query": 37.2752, "num_token_union": 65.4442, "num_word_context": 202.288, "num_word_doc": 49.9773, "num_word_query": 27.9044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1130.1164, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2079, "query_norm": 1.345, "queue_k_norm": 1.4034, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2752, "sent_len_1": 66.9411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4425, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.6457, "doc_norm": 1.4003, "encoder_q-embeddings": 633.2102, "encoder_q-layer.0": 413.9797, "encoder_q-layer.1": 419.258, "encoder_q-layer.10": 583.9086, "encoder_q-layer.11": 1622.7688, "encoder_q-layer.2": 472.6656, "encoder_q-layer.3": 464.023, "encoder_q-layer.4": 466.3195, "encoder_q-layer.5": 449.0764, "encoder_q-layer.6": 497.7567, "encoder_q-layer.7": 548.5489, "encoder_q-layer.8": 640.5843, "encoder_q-layer.9": 568.4637, "epoch": 0.35, "inbatch_neg_score": 0.2055, "inbatch_pos_score": 0.813, "learning_rate": 3.561111111111111e-05, "loss": 3.6457, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.8208, "num_token_overlap": 14.6049, "num_token_query": 37.352, "num_token_union": 65.3815, "num_word_context": 202.4654, "num_word_doc": 49.858, "num_word_query": 27.9648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.3165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2051, "query_norm": 1.3392, "queue_k_norm": 1.4028, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.352, "sent_len_1": 66.8208, "sent_len_max_0": 128.0, "sent_len_max_1": 188.18, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6453, "doc_norm": 1.4061, "encoder_q-embeddings": 652.3439, "encoder_q-layer.0": 436.0117, "encoder_q-layer.1": 457.7603, "encoder_q-layer.10": 552.1681, "encoder_q-layer.11": 1502.593, "encoder_q-layer.2": 496.9574, "encoder_q-layer.3": 499.5649, "encoder_q-layer.4": 485.8005, "encoder_q-layer.5": 493.3333, "encoder_q-layer.6": 547.1791, "encoder_q-layer.7": 596.2637, "encoder_q-layer.8": 617.3293, "encoder_q-layer.9": 571.3417, "epoch": 0.35, "inbatch_neg_score": 0.205, "inbatch_pos_score": 0.8315, "learning_rate": 3.555555555555556e-05, "loss": 3.6453, "norm_diff": 0.0559, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 14.6231, "num_token_query": 37.4509, "num_token_union": 65.4302, "num_word_context": 202.3206, "num_word_doc": 49.8628, "num_word_query": 28.0564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1012.8316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2046, "query_norm": 1.3502, "queue_k_norm": 1.4031, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4509, "sent_len_1": 66.7993, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6037, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.6222, "doc_norm": 1.4067, "encoder_q-embeddings": 586.4046, "encoder_q-layer.0": 385.5782, "encoder_q-layer.1": 400.2086, "encoder_q-layer.10": 638.2382, "encoder_q-layer.11": 1510.9893, "encoder_q-layer.2": 428.5193, "encoder_q-layer.3": 459.6541, "encoder_q-layer.4": 470.3406, "encoder_q-layer.5": 465.1267, "encoder_q-layer.6": 521.694, "encoder_q-layer.7": 604.4224, "encoder_q-layer.8": 677.0557, "encoder_q-layer.9": 592.4086, "epoch": 0.35, "inbatch_neg_score": 0.1953, "inbatch_pos_score": 0.8213, "learning_rate": 3.55e-05, "loss": 3.6222, "norm_diff": 0.0738, "norm_loss": 0.0, "num_token_doc": 66.9551, "num_token_overlap": 14.6593, "num_token_query": 37.6306, "num_token_union": 65.6284, "num_word_context": 202.7818, "num_word_doc": 50.0078, "num_word_query": 28.2086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 983.4556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.196, "query_norm": 1.3329, "queue_k_norm": 1.4044, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.6306, "sent_len_1": 66.9551, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0563, "stdk": 0.048, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.6545, "doc_norm": 1.4065, "encoder_q-embeddings": 709.2515, "encoder_q-layer.0": 485.2303, "encoder_q-layer.1": 512.6564, "encoder_q-layer.10": 629.7562, "encoder_q-layer.11": 1671.8193, "encoder_q-layer.2": 580.9656, "encoder_q-layer.3": 593.0785, "encoder_q-layer.4": 636.8517, "encoder_q-layer.5": 618.398, "encoder_q-layer.6": 670.3132, "encoder_q-layer.7": 713.1766, "encoder_q-layer.8": 671.8804, "encoder_q-layer.9": 617.8242, "epoch": 0.35, "inbatch_neg_score": 0.1982, "inbatch_pos_score": 0.7915, "learning_rate": 3.5444444444444445e-05, "loss": 3.6545, "norm_diff": 0.0831, "norm_loss": 0.0, "num_token_doc": 66.7202, "num_token_overlap": 14.5542, "num_token_query": 37.2976, "num_token_union": 65.3779, "num_word_context": 202.0959, "num_word_doc": 49.8056, "num_word_query": 27.9138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1139.5824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1993, "query_norm": 1.3235, "queue_k_norm": 1.4032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2976, "sent_len_1": 66.7202, "sent_len_max_0": 127.98, "sent_len_max_1": 188.225, "stdk": 0.0481, "stdq": 0.0436, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6157, "doc_norm": 1.4024, "encoder_q-embeddings": 1261.849, "encoder_q-layer.0": 836.0929, "encoder_q-layer.1": 876.2411, "encoder_q-layer.10": 608.3493, "encoder_q-layer.11": 1537.1031, "encoder_q-layer.2": 1014.1199, "encoder_q-layer.3": 1008.6694, "encoder_q-layer.4": 1063.7805, "encoder_q-layer.5": 989.3552, "encoder_q-layer.6": 686.2607, "encoder_q-layer.7": 670.2194, "encoder_q-layer.8": 661.6584, "encoder_q-layer.9": 580.2091, "epoch": 0.35, "inbatch_neg_score": 0.1977, "inbatch_pos_score": 0.8359, "learning_rate": 3.538888888888889e-05, "loss": 3.6157, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 66.7777, "num_token_overlap": 14.6064, "num_token_query": 37.3376, "num_token_union": 65.4037, "num_word_context": 202.2628, "num_word_doc": 49.8301, "num_word_query": 27.9576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1445.9303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1992, "query_norm": 1.3403, "queue_k_norm": 1.4029, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3376, "sent_len_1": 66.7777, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.8212, "stdk": 0.0479, "stdq": 0.0443, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.6242, "doc_norm": 1.405, "encoder_q-embeddings": 742.5876, "encoder_q-layer.0": 504.4868, "encoder_q-layer.1": 518.1122, "encoder_q-layer.10": 616.0266, "encoder_q-layer.11": 1528.927, "encoder_q-layer.2": 597.9104, "encoder_q-layer.3": 592.8149, "encoder_q-layer.4": 648.1786, "encoder_q-layer.5": 667.3936, "encoder_q-layer.6": 645.0776, "encoder_q-layer.7": 592.5309, "encoder_q-layer.8": 672.984, "encoder_q-layer.9": 583.1044, "epoch": 0.36, "inbatch_neg_score": 0.2029, "inbatch_pos_score": 0.8247, "learning_rate": 3.5333333333333336e-05, "loss": 3.6242, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.8501, "num_token_overlap": 14.5887, "num_token_query": 37.3224, "num_token_union": 65.4307, "num_word_context": 202.4315, "num_word_doc": 49.887, "num_word_query": 27.9448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1106.1036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2035, "query_norm": 1.3422, "queue_k_norm": 1.4044, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3224, "sent_len_1": 66.8501, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.8875, "stdk": 0.048, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.6238, "doc_norm": 1.4028, "encoder_q-embeddings": 1606.2423, "encoder_q-layer.0": 1277.9303, "encoder_q-layer.1": 1268.672, "encoder_q-layer.10": 639.4539, "encoder_q-layer.11": 1547.6183, "encoder_q-layer.2": 1489.936, "encoder_q-layer.3": 1623.3862, "encoder_q-layer.4": 1652.1707, "encoder_q-layer.5": 1445.9419, "encoder_q-layer.6": 1055.6804, "encoder_q-layer.7": 756.3701, "encoder_q-layer.8": 730.6953, "encoder_q-layer.9": 607.4998, "epoch": 0.36, "inbatch_neg_score": 0.2015, "inbatch_pos_score": 0.8091, "learning_rate": 3.527777777777778e-05, "loss": 3.6238, "norm_diff": 0.0697, "norm_loss": 0.0, "num_token_doc": 66.8393, "num_token_overlap": 14.5899, "num_token_query": 37.3436, "num_token_union": 65.44, "num_word_context": 202.4313, "num_word_doc": 49.8801, "num_word_query": 27.9737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1936.8376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2012, "query_norm": 1.3331, "queue_k_norm": 1.4044, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3436, "sent_len_1": 66.8393, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2337, "stdk": 0.0479, "stdq": 0.0438, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.6256, "doc_norm": 1.3991, "encoder_q-embeddings": 1475.6356, "encoder_q-layer.0": 948.9407, "encoder_q-layer.1": 994.317, "encoder_q-layer.10": 1232.0701, "encoder_q-layer.11": 3050.7854, "encoder_q-layer.2": 1104.287, "encoder_q-layer.3": 1093.4628, "encoder_q-layer.4": 1070.063, "encoder_q-layer.5": 1013.2146, "encoder_q-layer.6": 1039.6464, "encoder_q-layer.7": 1083.3374, "encoder_q-layer.8": 1266.603, "encoder_q-layer.9": 1184.1251, "epoch": 0.36, "inbatch_neg_score": 0.1944, "inbatch_pos_score": 0.7969, "learning_rate": 3.522222222222222e-05, "loss": 3.6256, "norm_diff": 0.0887, "norm_loss": 0.0, "num_token_doc": 66.7123, "num_token_overlap": 14.5425, "num_token_query": 37.2463, "num_token_union": 65.3379, "num_word_context": 202.2791, "num_word_doc": 49.8274, "num_word_query": 27.8801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.0573, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1952, "query_norm": 1.3104, "queue_k_norm": 1.4016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2463, "sent_len_1": 66.7123, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1287, "stdk": 0.0478, "stdq": 0.0431, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.6318, "doc_norm": 1.4041, "encoder_q-embeddings": 3145.0312, "encoder_q-layer.0": 2162.1965, "encoder_q-layer.1": 2222.7732, "encoder_q-layer.10": 1178.8174, "encoder_q-layer.11": 3046.0854, "encoder_q-layer.2": 2468.4436, "encoder_q-layer.3": 2440.5117, "encoder_q-layer.4": 2211.9805, "encoder_q-layer.5": 2311.5125, "encoder_q-layer.6": 1763.4578, "encoder_q-layer.7": 1449.9475, "encoder_q-layer.8": 1349.0269, "encoder_q-layer.9": 1183.1631, "epoch": 0.36, "inbatch_neg_score": 0.1925, "inbatch_pos_score": 0.8066, "learning_rate": 3.516666666666667e-05, "loss": 3.6318, "norm_diff": 0.0474, "norm_loss": 0.0, "num_token_doc": 66.8537, "num_token_overlap": 14.5904, "num_token_query": 37.235, "num_token_union": 65.382, "num_word_context": 202.5844, "num_word_doc": 49.8694, "num_word_query": 27.8691, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3332.385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1929, "query_norm": 1.3568, "queue_k_norm": 1.4019, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.235, "sent_len_1": 66.8537, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.24, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6027, "doc_norm": 1.3947, "encoder_q-embeddings": 1377.5923, "encoder_q-layer.0": 1008.9978, "encoder_q-layer.1": 1078.0267, "encoder_q-layer.10": 1410.8179, "encoder_q-layer.11": 3239.5139, "encoder_q-layer.2": 1184.3069, "encoder_q-layer.3": 1188.2354, "encoder_q-layer.4": 1269.7776, "encoder_q-layer.5": 1453.9199, "encoder_q-layer.6": 1412.5306, "encoder_q-layer.7": 1156.0682, "encoder_q-layer.8": 1379.1193, "encoder_q-layer.9": 1296.4043, "epoch": 0.36, "inbatch_neg_score": 0.1931, "inbatch_pos_score": 0.7891, "learning_rate": 3.511111111111111e-05, "loss": 3.6027, "norm_diff": 0.0263, "norm_loss": 0.0, "num_token_doc": 66.7856, "num_token_overlap": 14.6694, "num_token_query": 37.4698, "num_token_union": 65.4305, "num_word_context": 202.3846, "num_word_doc": 49.89, "num_word_query": 28.1053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2239.3283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1934, "query_norm": 1.3685, "queue_k_norm": 1.4015, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4698, "sent_len_1": 66.7856, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.4988, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.6445, "doc_norm": 1.3944, "encoder_q-embeddings": 1601.8508, "encoder_q-layer.0": 1126.1713, "encoder_q-layer.1": 1237.9634, "encoder_q-layer.10": 1186.9856, "encoder_q-layer.11": 3074.4675, "encoder_q-layer.2": 1471.8073, "encoder_q-layer.3": 1668.105, "encoder_q-layer.4": 1764.2939, "encoder_q-layer.5": 2146.978, "encoder_q-layer.6": 1682.4104, "encoder_q-layer.7": 1258.6361, "encoder_q-layer.8": 1263.0311, "encoder_q-layer.9": 1135.4282, "epoch": 0.36, "inbatch_neg_score": 0.1919, "inbatch_pos_score": 0.7871, "learning_rate": 3.505555555555556e-05, "loss": 3.6445, "norm_diff": 0.0676, "norm_loss": 0.0, "num_token_doc": 66.836, "num_token_overlap": 14.4818, "num_token_query": 37.1082, "num_token_union": 65.334, "num_word_context": 202.5694, "num_word_doc": 49.8509, "num_word_query": 27.7741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2507.8824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1915, "query_norm": 1.3269, "queue_k_norm": 1.402, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1082, "sent_len_1": 66.836, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6138, "stdk": 0.0476, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.6357, "doc_norm": 1.4004, "encoder_q-embeddings": 1051.485, "encoder_q-layer.0": 708.9469, "encoder_q-layer.1": 699.4522, "encoder_q-layer.10": 1122.416, "encoder_q-layer.11": 2874.3374, "encoder_q-layer.2": 739.476, "encoder_q-layer.3": 756.1299, "encoder_q-layer.4": 827.1286, "encoder_q-layer.5": 778.2966, "encoder_q-layer.6": 911.5631, "encoder_q-layer.7": 1040.9614, "encoder_q-layer.8": 1257.5748, "encoder_q-layer.9": 1116.6787, "epoch": 0.36, "inbatch_neg_score": 0.2006, "inbatch_pos_score": 0.8198, "learning_rate": 3.5e-05, "loss": 3.6357, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.4493, "num_token_overlap": 14.4985, "num_token_query": 37.136, "num_token_union": 65.1479, "num_word_context": 201.8488, "num_word_doc": 49.5652, "num_word_query": 27.7995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1831.2191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1992, "query_norm": 1.386, "queue_k_norm": 1.4004, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.136, "sent_len_1": 66.4493, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4112, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.6184, "doc_norm": 1.3949, "encoder_q-embeddings": 1504.9337, "encoder_q-layer.0": 998.2312, "encoder_q-layer.1": 1049.5017, "encoder_q-layer.10": 1068.4629, "encoder_q-layer.11": 2732.106, "encoder_q-layer.2": 1191.6014, "encoder_q-layer.3": 1190.4006, "encoder_q-layer.4": 1231.7448, "encoder_q-layer.5": 1153.4552, "encoder_q-layer.6": 1185.2983, "encoder_q-layer.7": 1218.2148, "encoder_q-layer.8": 1275.6879, "encoder_q-layer.9": 1094.9391, "epoch": 0.36, "inbatch_neg_score": 0.2007, "inbatch_pos_score": 0.8384, "learning_rate": 3.4944444444444446e-05, "loss": 3.6184, "norm_diff": 0.0114, "norm_loss": 0.0, "num_token_doc": 66.8461, "num_token_overlap": 14.5798, "num_token_query": 37.3461, "num_token_union": 65.4234, "num_word_context": 202.2895, "num_word_doc": 49.8424, "num_word_query": 27.9556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2083.153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2009, "query_norm": 1.3992, "queue_k_norm": 1.4008, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3461, "sent_len_1": 66.8461, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7925, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6059, "doc_norm": 1.3989, "encoder_q-embeddings": 2625.4771, "encoder_q-layer.0": 1821.4801, "encoder_q-layer.1": 2267.9434, "encoder_q-layer.10": 1249.7866, "encoder_q-layer.11": 3037.4785, "encoder_q-layer.2": 2551.9285, "encoder_q-layer.3": 2061.7595, "encoder_q-layer.4": 1988.2361, "encoder_q-layer.5": 1631.8351, "encoder_q-layer.6": 1501.2982, "encoder_q-layer.7": 1356.042, "encoder_q-layer.8": 1344.7355, "encoder_q-layer.9": 1202.0916, "epoch": 0.36, "inbatch_neg_score": 0.2179, "inbatch_pos_score": 0.8589, "learning_rate": 3.4888888888888895e-05, "loss": 3.6059, "norm_diff": 0.0254, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 14.6306, "num_token_query": 37.4181, "num_token_union": 65.4386, "num_word_context": 202.4474, "num_word_doc": 49.9245, "num_word_query": 28.0237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3028.3832, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.217, "query_norm": 1.4191, "queue_k_norm": 1.4028, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4181, "sent_len_1": 66.8786, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1387, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6004, "doc_norm": 1.3981, "encoder_q-embeddings": 1877.6633, "encoder_q-layer.0": 1289.8174, "encoder_q-layer.1": 1274.7233, "encoder_q-layer.10": 1138.9878, "encoder_q-layer.11": 2778.304, "encoder_q-layer.2": 1488.6453, "encoder_q-layer.3": 1530.025, "encoder_q-layer.4": 1475.1003, "encoder_q-layer.5": 1381.1365, "encoder_q-layer.6": 1217.8469, "encoder_q-layer.7": 1162.0808, "encoder_q-layer.8": 1279.9569, "encoder_q-layer.9": 1144.9062, "epoch": 0.36, "inbatch_neg_score": 0.2162, "inbatch_pos_score": 0.8535, "learning_rate": 3.483333333333334e-05, "loss": 3.6004, "norm_diff": 0.012, "norm_loss": 0.0, "num_token_doc": 66.9194, "num_token_overlap": 14.64, "num_token_query": 37.3586, "num_token_union": 65.4259, "num_word_context": 201.9839, "num_word_doc": 49.9616, "num_word_query": 27.9978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2334.9538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2151, "query_norm": 1.3995, "queue_k_norm": 1.4016, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3586, "sent_len_1": 66.9194, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2562, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.5992, "doc_norm": 1.4016, "encoder_q-embeddings": 1308.3354, "encoder_q-layer.0": 872.7989, "encoder_q-layer.1": 898.2109, "encoder_q-layer.10": 1213.5634, "encoder_q-layer.11": 3146.3965, "encoder_q-layer.2": 1010.5353, "encoder_q-layer.3": 995.5854, "encoder_q-layer.4": 1059.8077, "encoder_q-layer.5": 1082.2527, "encoder_q-layer.6": 1090.181, "encoder_q-layer.7": 1130.8483, "encoder_q-layer.8": 1305.1642, "encoder_q-layer.9": 1193.183, "epoch": 0.37, "inbatch_neg_score": 0.219, "inbatch_pos_score": 0.8223, "learning_rate": 3.477777777777778e-05, "loss": 3.5992, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.7991, "num_token_overlap": 14.6066, "num_token_query": 37.2157, "num_token_union": 65.287, "num_word_context": 202.0994, "num_word_doc": 49.8026, "num_word_query": 27.8208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2073.4718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.219, "query_norm": 1.3922, "queue_k_norm": 1.4012, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2157, "sent_len_1": 66.7991, "sent_len_max_0": 128.0, "sent_len_max_1": 193.6113, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.6088, "doc_norm": 1.3999, "encoder_q-embeddings": 1171.4465, "encoder_q-layer.0": 765.5538, "encoder_q-layer.1": 776.6893, "encoder_q-layer.10": 1102.139, "encoder_q-layer.11": 2877.4907, "encoder_q-layer.2": 850.5864, "encoder_q-layer.3": 883.3632, "encoder_q-layer.4": 892.3997, "encoder_q-layer.5": 906.8457, "encoder_q-layer.6": 963.8789, "encoder_q-layer.7": 1121.0023, "encoder_q-layer.8": 1193.0848, "encoder_q-layer.9": 1092.8251, "epoch": 0.37, "inbatch_neg_score": 0.2212, "inbatch_pos_score": 0.8613, "learning_rate": 3.472222222222222e-05, "loss": 3.6088, "norm_diff": 0.0269, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 14.5919, "num_token_query": 37.2885, "num_token_union": 65.2477, "num_word_context": 202.469, "num_word_doc": 49.7472, "num_word_query": 27.9317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1894.9449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2207, "query_norm": 1.373, "queue_k_norm": 1.4016, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2885, "sent_len_1": 66.6342, "sent_len_max_0": 127.99, "sent_len_max_1": 187.6238, "stdk": 0.0477, "stdq": 0.0442, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6166, "doc_norm": 1.4015, "encoder_q-embeddings": 1956.9512, "encoder_q-layer.0": 1423.745, "encoder_q-layer.1": 1561.0328, "encoder_q-layer.10": 1233.0502, "encoder_q-layer.11": 3239.0498, "encoder_q-layer.2": 1839.0929, "encoder_q-layer.3": 1901.5431, "encoder_q-layer.4": 1991.3881, "encoder_q-layer.5": 2046.9683, "encoder_q-layer.6": 1868.436, "encoder_q-layer.7": 1599.9275, "encoder_q-layer.8": 1297.8868, "encoder_q-layer.9": 1183.6263, "epoch": 0.37, "inbatch_neg_score": 0.2113, "inbatch_pos_score": 0.8232, "learning_rate": 3.466666666666667e-05, "loss": 3.6166, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.737, "num_token_overlap": 14.5682, "num_token_query": 37.2545, "num_token_union": 65.3183, "num_word_context": 202.2247, "num_word_doc": 49.8029, "num_word_query": 27.9093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2743.1812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2126, "query_norm": 1.3432, "queue_k_norm": 1.4012, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2545, "sent_len_1": 66.737, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.305, "stdk": 0.0478, "stdq": 0.0438, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.587, "doc_norm": 1.403, "encoder_q-embeddings": 1152.1072, "encoder_q-layer.0": 748.3509, "encoder_q-layer.1": 759.4722, "encoder_q-layer.10": 1132.2053, "encoder_q-layer.11": 3046.5303, "encoder_q-layer.2": 802.3851, "encoder_q-layer.3": 822.4302, "encoder_q-layer.4": 850.0463, "encoder_q-layer.5": 846.7116, "encoder_q-layer.6": 933.2179, "encoder_q-layer.7": 998.4041, "encoder_q-layer.8": 1174.2455, "encoder_q-layer.9": 1057.3047, "epoch": 0.37, "inbatch_neg_score": 0.2138, "inbatch_pos_score": 0.8218, "learning_rate": 3.4611111111111114e-05, "loss": 3.587, "norm_diff": 0.0762, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 14.5096, "num_token_query": 37.0806, "num_token_union": 65.3418, "num_word_context": 202.4657, "num_word_doc": 49.8984, "num_word_query": 27.7587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1875.039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2133, "query_norm": 1.3268, "queue_k_norm": 1.4037, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0806, "sent_len_1": 66.8035, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.7537, "stdk": 0.0479, "stdq": 0.0434, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5974, "doc_norm": 1.4011, "encoder_q-embeddings": 2002.906, "encoder_q-layer.0": 1343.4076, "encoder_q-layer.1": 1602.46, "encoder_q-layer.10": 1131.5494, "encoder_q-layer.11": 3300.2292, "encoder_q-layer.2": 1763.0532, "encoder_q-layer.3": 1883.0212, "encoder_q-layer.4": 2130.0908, "encoder_q-layer.5": 2103.9907, "encoder_q-layer.6": 1746.0093, "encoder_q-layer.7": 1412.5066, "encoder_q-layer.8": 1360.8296, "encoder_q-layer.9": 1191.0551, "epoch": 0.37, "inbatch_neg_score": 0.2068, "inbatch_pos_score": 0.8252, "learning_rate": 3.4555555555555556e-05, "loss": 3.5974, "norm_diff": 0.0636, "norm_loss": 0.0, "num_token_doc": 66.7277, "num_token_overlap": 14.6761, "num_token_query": 37.6242, "num_token_union": 65.4945, "num_word_context": 202.2916, "num_word_doc": 49.7951, "num_word_query": 28.1837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2751.9712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2061, "query_norm": 1.3375, "queue_k_norm": 1.4028, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6242, "sent_len_1": 66.7277, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0737, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.5881, "doc_norm": 1.4031, "encoder_q-embeddings": 1413.9712, "encoder_q-layer.0": 971.1175, "encoder_q-layer.1": 1026.157, "encoder_q-layer.10": 1152.3254, "encoder_q-layer.11": 3038.4937, "encoder_q-layer.2": 1167.7505, "encoder_q-layer.3": 1116.1639, "encoder_q-layer.4": 1167.8057, "encoder_q-layer.5": 1217.11, "encoder_q-layer.6": 1195.4836, "encoder_q-layer.7": 1174.1581, "encoder_q-layer.8": 1258.8745, "encoder_q-layer.9": 1148.5955, "epoch": 0.37, "inbatch_neg_score": 0.2067, "inbatch_pos_score": 0.8545, "learning_rate": 3.45e-05, "loss": 3.5881, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 14.6179, "num_token_query": 37.4287, "num_token_union": 65.4784, "num_word_context": 202.238, "num_word_doc": 49.9217, "num_word_query": 28.0257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2103.5245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2054, "query_norm": 1.3552, "queue_k_norm": 1.4034, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4287, "sent_len_1": 66.8578, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.5525, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6062, "doc_norm": 1.4043, "encoder_q-embeddings": 1368.9379, "encoder_q-layer.0": 899.8935, "encoder_q-layer.1": 910.5665, "encoder_q-layer.10": 1158.8428, "encoder_q-layer.11": 3148.3872, "encoder_q-layer.2": 1043.2937, "encoder_q-layer.3": 1058.613, "encoder_q-layer.4": 1107.2667, "encoder_q-layer.5": 1107.2808, "encoder_q-layer.6": 1115.078, "encoder_q-layer.7": 1166.1096, "encoder_q-layer.8": 1302.3074, "encoder_q-layer.9": 1163.7815, "epoch": 0.37, "inbatch_neg_score": 0.202, "inbatch_pos_score": 0.8286, "learning_rate": 3.444444444444445e-05, "loss": 3.6062, "norm_diff": 0.0509, "norm_loss": 0.0, "num_token_doc": 66.8271, "num_token_overlap": 14.5758, "num_token_query": 37.3147, "num_token_union": 65.4253, "num_word_context": 202.3423, "num_word_doc": 49.8579, "num_word_query": 27.9675, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2077.6902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2009, "query_norm": 1.3534, "queue_k_norm": 1.4035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3147, "sent_len_1": 66.8271, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1912, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5974, "doc_norm": 1.4037, "encoder_q-embeddings": 1039.4291, "encoder_q-layer.0": 689.4263, "encoder_q-layer.1": 721.1838, "encoder_q-layer.10": 1244.4808, "encoder_q-layer.11": 2992.2529, "encoder_q-layer.2": 794.7949, "encoder_q-layer.3": 798.1435, "encoder_q-layer.4": 853.3532, "encoder_q-layer.5": 888.9612, "encoder_q-layer.6": 958.7759, "encoder_q-layer.7": 1069.2484, "encoder_q-layer.8": 1240.0485, "encoder_q-layer.9": 1152.8315, "epoch": 0.37, "inbatch_neg_score": 0.1933, "inbatch_pos_score": 0.7949, "learning_rate": 3.438888888888889e-05, "loss": 3.5974, "norm_diff": 0.0942, "norm_loss": 0.0, "num_token_doc": 66.6033, "num_token_overlap": 14.6024, "num_token_query": 37.3692, "num_token_union": 65.26, "num_word_context": 202.2502, "num_word_doc": 49.6813, "num_word_query": 27.9623, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1894.6107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1935, "query_norm": 1.3094, "queue_k_norm": 1.4032, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3692, "sent_len_1": 66.6033, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0312, "stdk": 0.0479, "stdq": 0.0432, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6181, "doc_norm": 1.3933, "encoder_q-embeddings": 1289.0139, "encoder_q-layer.0": 883.1896, "encoder_q-layer.1": 946.1634, "encoder_q-layer.10": 1113.3588, "encoder_q-layer.11": 2957.5977, "encoder_q-layer.2": 1046.656, "encoder_q-layer.3": 1099.1282, "encoder_q-layer.4": 1078.2681, "encoder_q-layer.5": 1083.1024, "encoder_q-layer.6": 1144.415, "encoder_q-layer.7": 1138.7103, "encoder_q-layer.8": 1249.9763, "encoder_q-layer.9": 1130.2213, "epoch": 0.37, "inbatch_neg_score": 0.191, "inbatch_pos_score": 0.7827, "learning_rate": 3.433333333333333e-05, "loss": 3.6181, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 66.7531, "num_token_overlap": 14.5699, "num_token_query": 37.1808, "num_token_union": 65.3107, "num_word_context": 202.1118, "num_word_doc": 49.8301, "num_word_query": 27.8432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.7972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1899, "query_norm": 1.3329, "queue_k_norm": 1.4048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1808, "sent_len_1": 66.7531, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7763, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5949, "doc_norm": 1.4067, "encoder_q-embeddings": 7393.3442, "encoder_q-layer.0": 5427.3379, "encoder_q-layer.1": 5601.2051, "encoder_q-layer.10": 1221.3308, "encoder_q-layer.11": 3049.7197, "encoder_q-layer.2": 6235.2358, "encoder_q-layer.3": 7222.0366, "encoder_q-layer.4": 7760.8901, "encoder_q-layer.5": 7898.4907, "encoder_q-layer.6": 7740.5171, "encoder_q-layer.7": 6582.0601, "encoder_q-layer.8": 2271.4314, "encoder_q-layer.9": 1263.7577, "epoch": 0.37, "inbatch_neg_score": 0.1841, "inbatch_pos_score": 0.811, "learning_rate": 3.427777777777778e-05, "loss": 3.5949, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.7064, "num_token_overlap": 14.6181, "num_token_query": 37.2827, "num_token_union": 65.268, "num_word_context": 201.8643, "num_word_doc": 49.7712, "num_word_query": 27.9141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8849.5363, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1836, "query_norm": 1.3312, "queue_k_norm": 1.4038, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2827, "sent_len_1": 66.7064, "sent_len_max_0": 128.0, "sent_len_max_1": 187.76, "stdk": 0.048, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5987, "doc_norm": 1.3979, "encoder_q-embeddings": 2476.1497, "encoder_q-layer.0": 1779.5723, "encoder_q-layer.1": 1971.618, "encoder_q-layer.10": 1169.1388, "encoder_q-layer.11": 3051.6436, "encoder_q-layer.2": 2091.9099, "encoder_q-layer.3": 2334.9543, "encoder_q-layer.4": 2635.0449, "encoder_q-layer.5": 2144.3457, "encoder_q-layer.6": 2129.6121, "encoder_q-layer.7": 1909.0515, "encoder_q-layer.8": 1591.634, "encoder_q-layer.9": 1172.1732, "epoch": 0.37, "inbatch_neg_score": 0.1818, "inbatch_pos_score": 0.7852, "learning_rate": 3.4222222222222224e-05, "loss": 3.5987, "norm_diff": 0.071, "norm_loss": 0.0, "num_token_doc": 66.8146, "num_token_overlap": 14.6081, "num_token_query": 37.4101, "num_token_union": 65.4644, "num_word_context": 202.2363, "num_word_doc": 49.8769, "num_word_query": 28.0224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3163.9316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1816, "query_norm": 1.3269, "queue_k_norm": 1.4011, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4101, "sent_len_1": 66.8146, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5337, "stdk": 0.0478, "stdq": 0.0439, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5856, "doc_norm": 1.4019, "encoder_q-embeddings": 1106.3799, "encoder_q-layer.0": 707.6121, "encoder_q-layer.1": 733.0482, "encoder_q-layer.10": 1221.6249, "encoder_q-layer.11": 2997.7749, "encoder_q-layer.2": 777.3178, "encoder_q-layer.3": 777.9727, "encoder_q-layer.4": 808.521, "encoder_q-layer.5": 828.0726, "encoder_q-layer.6": 928.4627, "encoder_q-layer.7": 1013.9598, "encoder_q-layer.8": 1298.6328, "encoder_q-layer.9": 1213.2738, "epoch": 0.38, "inbatch_neg_score": 0.1802, "inbatch_pos_score": 0.791, "learning_rate": 3.4166666666666666e-05, "loss": 3.5856, "norm_diff": 0.0858, "norm_loss": 0.0, "num_token_doc": 66.7415, "num_token_overlap": 14.6594, "num_token_query": 37.524, "num_token_union": 65.4562, "num_word_context": 202.5394, "num_word_doc": 49.7873, "num_word_query": 28.1051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1862.1088, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1793, "query_norm": 1.3161, "queue_k_norm": 1.4007, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.524, "sent_len_1": 66.7415, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0675, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5742, "doc_norm": 1.4015, "encoder_q-embeddings": 4444.4883, "encoder_q-layer.0": 3094.6399, "encoder_q-layer.1": 3351.4868, "encoder_q-layer.10": 2337.7966, "encoder_q-layer.11": 5679.9849, "encoder_q-layer.2": 4343.1772, "encoder_q-layer.3": 4497.8384, "encoder_q-layer.4": 4843.3892, "encoder_q-layer.5": 4877.3477, "encoder_q-layer.6": 5363.5732, "encoder_q-layer.7": 3884.6924, "encoder_q-layer.8": 2598.1155, "encoder_q-layer.9": 2302.0188, "epoch": 0.38, "inbatch_neg_score": 0.1804, "inbatch_pos_score": 0.7969, "learning_rate": 3.411111111111111e-05, "loss": 3.5742, "norm_diff": 0.0683, "norm_loss": 0.0, "num_token_doc": 66.8906, "num_token_overlap": 14.6214, "num_token_query": 37.3768, "num_token_union": 65.4441, "num_word_context": 202.4253, "num_word_doc": 49.9037, "num_word_query": 27.9861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6213.9291, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1793, "query_norm": 1.3332, "queue_k_norm": 1.4009, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3768, "sent_len_1": 66.8906, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7537, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.6017, "doc_norm": 1.4007, "encoder_q-embeddings": 2220.8059, "encoder_q-layer.0": 1508.4382, "encoder_q-layer.1": 1514.7968, "encoder_q-layer.10": 2133.2832, "encoder_q-layer.11": 5532.3843, "encoder_q-layer.2": 1684.8871, "encoder_q-layer.3": 1756.3116, "encoder_q-layer.4": 1833.9474, "encoder_q-layer.5": 1884.973, "encoder_q-layer.6": 2065.3323, "encoder_q-layer.7": 2035.3627, "encoder_q-layer.8": 2446.1877, "encoder_q-layer.9": 2145.7495, "epoch": 0.38, "inbatch_neg_score": 0.1835, "inbatch_pos_score": 0.8286, "learning_rate": 3.405555555555556e-05, "loss": 3.6017, "norm_diff": 0.0456, "norm_loss": 0.0, "num_token_doc": 66.988, "num_token_overlap": 14.567, "num_token_query": 37.3888, "num_token_union": 65.541, "num_word_context": 202.6413, "num_word_doc": 49.9564, "num_word_query": 27.9826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3679.6593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1832, "query_norm": 1.3551, "queue_k_norm": 1.3996, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3888, "sent_len_1": 66.988, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1912, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5704, "doc_norm": 1.4025, "encoder_q-embeddings": 3162.522, "encoder_q-layer.0": 2467.2773, "encoder_q-layer.1": 2538.3335, "encoder_q-layer.10": 2402.2693, "encoder_q-layer.11": 6212.6079, "encoder_q-layer.2": 2817.981, "encoder_q-layer.3": 2809.0488, "encoder_q-layer.4": 3154.3811, "encoder_q-layer.5": 2896.6245, "encoder_q-layer.6": 2975.0051, "encoder_q-layer.7": 2343.7336, "encoder_q-layer.8": 2431.2866, "encoder_q-layer.9": 2231.958, "epoch": 0.38, "inbatch_neg_score": 0.1805, "inbatch_pos_score": 0.8101, "learning_rate": 3.4000000000000007e-05, "loss": 3.5704, "norm_diff": 0.0646, "norm_loss": 0.0, "num_token_doc": 66.912, "num_token_overlap": 14.6664, "num_token_query": 37.4321, "num_token_union": 65.4283, "num_word_context": 202.5297, "num_word_doc": 49.947, "num_word_query": 28.02, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4682.0685, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1816, "query_norm": 1.3379, "queue_k_norm": 1.3996, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4321, "sent_len_1": 66.912, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.36, "stdk": 0.048, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.572, "doc_norm": 1.3986, "encoder_q-embeddings": 1349.4493, "encoder_q-layer.0": 957.6045, "encoder_q-layer.1": 942.0649, "encoder_q-layer.10": 1128.2473, "encoder_q-layer.11": 3000.7612, "encoder_q-layer.2": 1033.0077, "encoder_q-layer.3": 1073.5166, "encoder_q-layer.4": 1111.0193, "encoder_q-layer.5": 1090.6631, "encoder_q-layer.6": 1198.3113, "encoder_q-layer.7": 1293.3004, "encoder_q-layer.8": 1224.22, "encoder_q-layer.9": 1131.7593, "epoch": 0.38, "inbatch_neg_score": 0.1894, "inbatch_pos_score": 0.7979, "learning_rate": 3.394444444444444e-05, "loss": 3.572, "norm_diff": 0.058, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 14.5167, "num_token_query": 37.1811, "num_token_union": 65.2804, "num_word_context": 202.2519, "num_word_doc": 49.7267, "num_word_query": 27.8387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.8742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1873, "query_norm": 1.3406, "queue_k_norm": 1.3986, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1811, "sent_len_1": 66.6003, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1912, "stdk": 0.0478, "stdq": 0.0438, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6038, "doc_norm": 1.3998, "encoder_q-embeddings": 2123.824, "encoder_q-layer.0": 1646.2686, "encoder_q-layer.1": 1697.4379, "encoder_q-layer.10": 1109.3611, "encoder_q-layer.11": 2850.3054, "encoder_q-layer.2": 1515.9404, "encoder_q-layer.3": 1511.8317, "encoder_q-layer.4": 1553.1976, "encoder_q-layer.5": 1320.3246, "encoder_q-layer.6": 1292.8792, "encoder_q-layer.7": 1180.3025, "encoder_q-layer.8": 1313.7057, "encoder_q-layer.9": 1165.3253, "epoch": 0.38, "inbatch_neg_score": 0.1826, "inbatch_pos_score": 0.8081, "learning_rate": 3.388888888888889e-05, "loss": 3.6038, "norm_diff": 0.0433, "norm_loss": 0.0, "num_token_doc": 66.7458, "num_token_overlap": 14.5712, "num_token_query": 37.2552, "num_token_union": 65.3508, "num_word_context": 202.4214, "num_word_doc": 49.8236, "num_word_query": 27.8692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2483.105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1831, "query_norm": 1.3565, "queue_k_norm": 1.3972, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2552, "sent_len_1": 66.7458, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0825, "stdk": 0.048, "stdq": 0.0442, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5886, "doc_norm": 1.4026, "encoder_q-embeddings": 1230.5349, "encoder_q-layer.0": 828.9811, "encoder_q-layer.1": 857.7006, "encoder_q-layer.10": 1148.5542, "encoder_q-layer.11": 2850.8838, "encoder_q-layer.2": 950.0205, "encoder_q-layer.3": 979.9375, "encoder_q-layer.4": 1005.2508, "encoder_q-layer.5": 953.7886, "encoder_q-layer.6": 1016.916, "encoder_q-layer.7": 1049.4995, "encoder_q-layer.8": 1224.5811, "encoder_q-layer.9": 1142.83, "epoch": 0.38, "inbatch_neg_score": 0.1925, "inbatch_pos_score": 0.8193, "learning_rate": 3.3833333333333334e-05, "loss": 3.5886, "norm_diff": 0.0283, "norm_loss": 0.0, "num_token_doc": 66.875, "num_token_overlap": 14.5772, "num_token_query": 37.3604, "num_token_union": 65.4594, "num_word_context": 202.6818, "num_word_doc": 49.8452, "num_word_query": 27.9537, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.1042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1925, "query_norm": 1.3743, "queue_k_norm": 1.3988, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3604, "sent_len_1": 66.875, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9938, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5933, "doc_norm": 1.3972, "encoder_q-embeddings": 2912.0061, "encoder_q-layer.0": 2081.0767, "encoder_q-layer.1": 2031.3016, "encoder_q-layer.10": 1276.7778, "encoder_q-layer.11": 2961.5347, "encoder_q-layer.2": 2458.8682, "encoder_q-layer.3": 2406.7812, "encoder_q-layer.4": 2590.8499, "encoder_q-layer.5": 2072.9783, "encoder_q-layer.6": 1221.1958, "encoder_q-layer.7": 1159.0331, "encoder_q-layer.8": 1325.9325, "encoder_q-layer.9": 1205.7675, "epoch": 0.38, "inbatch_neg_score": 0.1997, "inbatch_pos_score": 0.8364, "learning_rate": 3.377777777777778e-05, "loss": 3.5933, "norm_diff": 0.0141, "norm_loss": 0.0, "num_token_doc": 66.628, "num_token_overlap": 14.5377, "num_token_query": 37.2139, "num_token_union": 65.2702, "num_word_context": 201.9044, "num_word_doc": 49.7, "num_word_query": 27.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3181.2328, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1992, "query_norm": 1.3841, "queue_k_norm": 1.3979, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2139, "sent_len_1": 66.628, "sent_len_max_0": 127.995, "sent_len_max_1": 190.3038, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.5845, "doc_norm": 1.398, "encoder_q-embeddings": 2409.0266, "encoder_q-layer.0": 1736.8812, "encoder_q-layer.1": 1942.1426, "encoder_q-layer.10": 1323.6356, "encoder_q-layer.11": 3061.1921, "encoder_q-layer.2": 2182.2476, "encoder_q-layer.3": 2024.6874, "encoder_q-layer.4": 2065.1353, "encoder_q-layer.5": 1753.1633, "encoder_q-layer.6": 1596.8381, "encoder_q-layer.7": 1486.5043, "encoder_q-layer.8": 1344.4392, "encoder_q-layer.9": 1169.447, "epoch": 0.38, "inbatch_neg_score": 0.2041, "inbatch_pos_score": 0.8052, "learning_rate": 3.3722222222222225e-05, "loss": 3.5845, "norm_diff": 0.0408, "norm_loss": 0.0, "num_token_doc": 66.929, "num_token_overlap": 14.6528, "num_token_query": 37.4003, "num_token_union": 65.444, "num_word_context": 202.9116, "num_word_doc": 49.9138, "num_word_query": 27.9946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2936.5187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2029, "query_norm": 1.3572, "queue_k_norm": 1.3988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4003, "sent_len_1": 66.929, "sent_len_max_0": 128.0, "sent_len_max_1": 189.09, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5655, "doc_norm": 1.4015, "encoder_q-embeddings": 1574.0884, "encoder_q-layer.0": 1042.2611, "encoder_q-layer.1": 1056.0742, "encoder_q-layer.10": 1168.825, "encoder_q-layer.11": 3078.366, "encoder_q-layer.2": 1215.1259, "encoder_q-layer.3": 1137.2499, "encoder_q-layer.4": 1164.3256, "encoder_q-layer.5": 1122.5571, "encoder_q-layer.6": 1062.0151, "encoder_q-layer.7": 1091.5261, "encoder_q-layer.8": 1213.3911, "encoder_q-layer.9": 1153.3721, "epoch": 0.38, "inbatch_neg_score": 0.2026, "inbatch_pos_score": 0.8169, "learning_rate": 3.366666666666667e-05, "loss": 3.5655, "norm_diff": 0.0726, "norm_loss": 0.0, "num_token_doc": 66.7744, "num_token_overlap": 14.5843, "num_token_query": 37.3354, "num_token_union": 65.3979, "num_word_context": 202.3482, "num_word_doc": 49.848, "num_word_query": 27.9561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2182.2383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2012, "query_norm": 1.3289, "queue_k_norm": 1.3985, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3354, "sent_len_1": 66.7744, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4487, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.5849, "doc_norm": 1.4, "encoder_q-embeddings": 1605.83, "encoder_q-layer.0": 1099.2605, "encoder_q-layer.1": 1198.4045, "encoder_q-layer.10": 1172.4954, "encoder_q-layer.11": 3351.5784, "encoder_q-layer.2": 1380.8988, "encoder_q-layer.3": 1456.8674, "encoder_q-layer.4": 1551.1545, "encoder_q-layer.5": 1498.282, "encoder_q-layer.6": 1459.2003, "encoder_q-layer.7": 1454.7954, "encoder_q-layer.8": 1365.4409, "encoder_q-layer.9": 1194.8584, "epoch": 0.39, "inbatch_neg_score": 0.2022, "inbatch_pos_score": 0.7979, "learning_rate": 3.3611111111111116e-05, "loss": 3.5849, "norm_diff": 0.063, "norm_loss": 0.0, "num_token_doc": 66.9377, "num_token_overlap": 14.6241, "num_token_query": 37.4255, "num_token_union": 65.5102, "num_word_context": 202.4924, "num_word_doc": 49.9061, "num_word_query": 28.0386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2500.047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2014, "query_norm": 1.337, "queue_k_norm": 1.4002, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4255, "sent_len_1": 66.9377, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.3013, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.5656, "doc_norm": 1.3996, "encoder_q-embeddings": 1638.307, "encoder_q-layer.0": 1133.4778, "encoder_q-layer.1": 1220.1287, "encoder_q-layer.10": 1205.0543, "encoder_q-layer.11": 2980.5149, "encoder_q-layer.2": 1360.5851, "encoder_q-layer.3": 1449.0753, "encoder_q-layer.4": 1550.056, "encoder_q-layer.5": 1527.2013, "encoder_q-layer.6": 1514.0203, "encoder_q-layer.7": 1478.2395, "encoder_q-layer.8": 1280.3018, "encoder_q-layer.9": 1133.4316, "epoch": 0.39, "inbatch_neg_score": 0.2042, "inbatch_pos_score": 0.8018, "learning_rate": 3.355555555555556e-05, "loss": 3.5656, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.8386, "num_token_overlap": 14.6725, "num_token_query": 37.4167, "num_token_union": 65.3417, "num_word_context": 202.1543, "num_word_doc": 49.8772, "num_word_query": 28.0168, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2348.6918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2042, "query_norm": 1.3326, "queue_k_norm": 1.4002, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4167, "sent_len_1": 66.8386, "sent_len_max_0": 127.99, "sent_len_max_1": 191.4787, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.587, "doc_norm": 1.408, "encoder_q-embeddings": 13326.5996, "encoder_q-layer.0": 10222.25, "encoder_q-layer.1": 9865.373, "encoder_q-layer.10": 1173.8357, "encoder_q-layer.11": 3327.8767, "encoder_q-layer.2": 12197.2051, "encoder_q-layer.3": 11846.083, "encoder_q-layer.4": 12963.043, "encoder_q-layer.5": 9036.3047, "encoder_q-layer.6": 8003.9316, "encoder_q-layer.7": 4176.8071, "encoder_q-layer.8": 2583.54, "encoder_q-layer.9": 1321.9414, "epoch": 0.39, "inbatch_neg_score": 0.2063, "inbatch_pos_score": 0.8301, "learning_rate": 3.35e-05, "loss": 3.587, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.5185, "num_token_overlap": 14.5217, "num_token_query": 37.1104, "num_token_union": 65.15, "num_word_context": 202.055, "num_word_doc": 49.6313, "num_word_query": 27.782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13691.165, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2053, "query_norm": 1.3533, "queue_k_norm": 1.3999, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1104, "sent_len_1": 66.5185, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1662, "stdk": 0.0482, "stdq": 0.0446, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.564, "doc_norm": 1.4041, "encoder_q-embeddings": 1425.8718, "encoder_q-layer.0": 926.4199, "encoder_q-layer.1": 1003.8007, "encoder_q-layer.10": 1395.0238, "encoder_q-layer.11": 3388.1462, "encoder_q-layer.2": 1134.8656, "encoder_q-layer.3": 1198.1896, "encoder_q-layer.4": 1265.0717, "encoder_q-layer.5": 1167.6877, "encoder_q-layer.6": 1233.3516, "encoder_q-layer.7": 1331.5164, "encoder_q-layer.8": 1556.3666, "encoder_q-layer.9": 1373.7181, "epoch": 0.39, "inbatch_neg_score": 0.2084, "inbatch_pos_score": 0.8521, "learning_rate": 3.3444444444444443e-05, "loss": 3.564, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.88, "num_token_overlap": 14.7054, "num_token_query": 37.5623, "num_token_union": 65.4786, "num_word_context": 202.2461, "num_word_doc": 49.8459, "num_word_query": 28.1026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2344.3994, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2087, "query_norm": 1.3773, "queue_k_norm": 1.4015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5623, "sent_len_1": 66.88, "sent_len_max_0": 128.0, "sent_len_max_1": 192.8088, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.5913, "doc_norm": 1.4015, "encoder_q-embeddings": 1647.2213, "encoder_q-layer.0": 1153.9636, "encoder_q-layer.1": 1088.9808, "encoder_q-layer.10": 1180.7949, "encoder_q-layer.11": 2878.4839, "encoder_q-layer.2": 1178.0302, "encoder_q-layer.3": 1164.2896, "encoder_q-layer.4": 1311.3069, "encoder_q-layer.5": 1162.7651, "encoder_q-layer.6": 1237.5563, "encoder_q-layer.7": 1170.3787, "encoder_q-layer.8": 1278.9132, "encoder_q-layer.9": 1145.6622, "epoch": 0.39, "inbatch_neg_score": 0.2068, "inbatch_pos_score": 0.8325, "learning_rate": 3.338888888888889e-05, "loss": 3.5913, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.7159, "num_token_overlap": 14.5457, "num_token_query": 37.3096, "num_token_union": 65.3597, "num_word_context": 202.3765, "num_word_doc": 49.7542, "num_word_query": 27.9363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2212.4968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2072, "query_norm": 1.3458, "queue_k_norm": 1.4003, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3096, "sent_len_1": 66.7159, "sent_len_max_0": 127.995, "sent_len_max_1": 191.4675, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.5823, "doc_norm": 1.4022, "encoder_q-embeddings": 1962.7736, "encoder_q-layer.0": 1347.0823, "encoder_q-layer.1": 1589.9243, "encoder_q-layer.10": 1268.028, "encoder_q-layer.11": 3258.6655, "encoder_q-layer.2": 1784.7823, "encoder_q-layer.3": 1737.6807, "encoder_q-layer.4": 1843.5336, "encoder_q-layer.5": 1650.7662, "encoder_q-layer.6": 1670.6444, "encoder_q-layer.7": 1460.6786, "encoder_q-layer.8": 1374.5144, "encoder_q-layer.9": 1191.5924, "epoch": 0.39, "inbatch_neg_score": 0.2062, "inbatch_pos_score": 0.7871, "learning_rate": 3.3333333333333335e-05, "loss": 3.5823, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.5971, "num_token_overlap": 14.5247, "num_token_query": 37.0879, "num_token_union": 65.1375, "num_word_context": 202.0361, "num_word_doc": 49.7108, "num_word_query": 27.7536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2682.6766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2061, "query_norm": 1.2971, "queue_k_norm": 1.401, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.0879, "sent_len_1": 66.5971, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7962, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 28.3256, "dev_samples_per_second": 2.259, "dev_steps_per_second": 0.035, "epoch": 0.39, "step": 40000, "test_accuracy": 92.61474609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4327126741409302, "test_doc_norm": 1.3773783445358276, "test_inbatch_neg_score": 0.5183354616165161, "test_inbatch_pos_score": 1.3953579664230347, "test_loss": 0.4327126741409302, "test_loss_align": 1.098644495010376, "test_loss_unif": 3.9060373306274414, "test_loss_unif_q@queue": 3.9060370922088623, "test_norm_diff": 0.02373998798429966, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.196208655834198, "test_query_norm": 1.4006659984588623, "test_queue_k_norm": 1.400918960571289, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04207324609160423, "test_stdq": 0.04082862660288811, "test_stdqueue_k": 0.04797288775444031, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.3256, "dev_samples_per_second": 2.259, "dev_steps_per_second": 0.035, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.36016, "eval_beir-arguana_recall@10": 0.61451, "eval_beir-arguana_recall@100": 0.91821, "eval_beir-arguana_recall@20": 0.75178, "eval_beir-avg_ndcg@10": 0.35747425, "eval_beir-avg_recall@10": 0.427126, "eval_beir-avg_recall@100": 0.611179, "eval_beir-avg_recall@20": 0.48629658333333337, "eval_beir-cqadupstack_ndcg@10": 0.24341249999999995, "eval_beir-cqadupstack_recall@10": 0.33050999999999997, "eval_beir-cqadupstack_recall@100": 0.55926, "eval_beir-cqadupstack_recall@20": 0.39349583333333343, "eval_beir-fiqa_ndcg@10": 0.21161, "eval_beir-fiqa_recall@10": 0.2637, "eval_beir-fiqa_recall@100": 0.527, "eval_beir-fiqa_recall@20": 0.3413, "eval_beir-nfcorpus_ndcg@10": 0.28276, "eval_beir-nfcorpus_recall@10": 0.13773, "eval_beir-nfcorpus_recall@100": 0.27297, "eval_beir-nfcorpus_recall@20": 0.16613, "eval_beir-nq_ndcg@10": 0.24353, "eval_beir-nq_recall@10": 0.40011, "eval_beir-nq_recall@100": 0.74836, "eval_beir-nq_recall@20": 0.52093, "eval_beir-quora_ndcg@10": 0.73215, "eval_beir-quora_recall@10": 0.85521, "eval_beir-quora_recall@100": 0.97037, "eval_beir-quora_recall@20": 0.90333, "eval_beir-scidocs_ndcg@10": 0.13037, "eval_beir-scidocs_recall@10": 0.13783, "eval_beir-scidocs_recall@100": 0.32863, "eval_beir-scidocs_recall@20": 0.19177, "eval_beir-scifact_ndcg@10": 0.61437, "eval_beir-scifact_recall@10": 0.769, "eval_beir-scifact_recall@100": 0.91189, "eval_beir-scifact_recall@20": 0.81911, "eval_beir-trec-covid_ndcg@10": 0.58304, "eval_beir-trec-covid_recall@10": 0.628, "eval_beir-trec-covid_recall@100": 0.4398, "eval_beir-trec-covid_recall@20": 0.584, "eval_beir-webis-touche2020_ndcg@10": 0.17334, "eval_beir-webis-touche2020_recall@10": 0.13466, "eval_beir-webis-touche2020_recall@100": 0.4353, "eval_beir-webis-touche2020_recall@20": 0.19112, "eval_senteval-avg_sts": 0.7486334662263476, "eval_senteval-sickr_spearman": 0.7220013554181447, "eval_senteval-stsb_spearman": 0.7752655770345506, "step": 40000, "test_accuracy": 92.61474609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4327126741409302, "test_doc_norm": 1.3773783445358276, "test_inbatch_neg_score": 0.5183354616165161, "test_inbatch_pos_score": 1.3953579664230347, "test_loss": 0.4327126741409302, "test_loss_align": 1.098644495010376, "test_loss_unif": 3.9060373306274414, "test_loss_unif_q@queue": 3.9060370922088623, "test_norm_diff": 0.02373998798429966, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.196208655834198, "test_query_norm": 1.4006659984588623, "test_queue_k_norm": 1.400918960571289, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04207324609160423, "test_stdq": 0.04082862660288811, "test_stdqueue_k": 0.04797288775444031, "test_stdqueue_q": 0.0 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5825, "doc_norm": 1.4072, "encoder_q-embeddings": 1557.9414, "encoder_q-layer.0": 1030.1917, "encoder_q-layer.1": 1128.1599, "encoder_q-layer.10": 1206.1061, "encoder_q-layer.11": 3149.6265, "encoder_q-layer.2": 1290.6852, "encoder_q-layer.3": 1422.8732, "encoder_q-layer.4": 1465.5587, "encoder_q-layer.5": 1350.97, "encoder_q-layer.6": 1318.5212, "encoder_q-layer.7": 1332.4258, "encoder_q-layer.8": 1378.0172, "encoder_q-layer.9": 1256.0491, "epoch": 0.39, "inbatch_neg_score": 0.2045, "inbatch_pos_score": 0.8306, "learning_rate": 3.327777777777778e-05, "loss": 3.5825, "norm_diff": 0.0704, "norm_loss": 0.0, "num_token_doc": 66.5425, "num_token_overlap": 14.5559, "num_token_query": 37.3621, "num_token_union": 65.2752, "num_word_context": 201.9837, "num_word_doc": 49.6281, "num_word_query": 27.9597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2334.9911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2053, "query_norm": 1.3369, "queue_k_norm": 1.402, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3621, "sent_len_1": 66.5425, "sent_len_max_0": 128.0, "sent_len_max_1": 189.67, "stdk": 0.0481, "stdq": 0.0441, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5847, "doc_norm": 1.4024, "encoder_q-embeddings": 8730.5449, "encoder_q-layer.0": 6697.7754, "encoder_q-layer.1": 7924.0552, "encoder_q-layer.10": 1219.0463, "encoder_q-layer.11": 3092.8999, "encoder_q-layer.2": 8945.2754, "encoder_q-layer.3": 8401.9688, "encoder_q-layer.4": 7018.2061, "encoder_q-layer.5": 5396.8408, "encoder_q-layer.6": 3173.4243, "encoder_q-layer.7": 2264.115, "encoder_q-layer.8": 1539.9175, "encoder_q-layer.9": 1172.6394, "epoch": 0.39, "inbatch_neg_score": 0.201, "inbatch_pos_score": 0.8306, "learning_rate": 3.322222222222222e-05, "loss": 3.5847, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.747, "num_token_overlap": 14.5303, "num_token_query": 37.0977, "num_token_union": 65.248, "num_word_context": 202.0513, "num_word_doc": 49.8016, "num_word_query": 27.7706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9063.9623, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2007, "query_norm": 1.3403, "queue_k_norm": 1.4035, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0977, "sent_len_1": 66.747, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.4737, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5811, "doc_norm": 1.4057, "encoder_q-embeddings": 1776.3687, "encoder_q-layer.0": 1310.3259, "encoder_q-layer.1": 1368.8192, "encoder_q-layer.10": 1341.355, "encoder_q-layer.11": 3107.9102, "encoder_q-layer.2": 1615.078, "encoder_q-layer.3": 1682.1177, "encoder_q-layer.4": 1779.0304, "encoder_q-layer.5": 1837.6338, "encoder_q-layer.6": 1571.2778, "encoder_q-layer.7": 1279.9639, "encoder_q-layer.8": 1342.6477, "encoder_q-layer.9": 1187.0286, "epoch": 0.39, "inbatch_neg_score": 0.1969, "inbatch_pos_score": 0.8198, "learning_rate": 3.316666666666667e-05, "loss": 3.5811, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 66.8058, "num_token_overlap": 14.5408, "num_token_query": 37.2334, "num_token_union": 65.3467, "num_word_context": 202.3722, "num_word_doc": 49.8467, "num_word_query": 27.8595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2526.8612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1969, "query_norm": 1.3278, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2334, "sent_len_1": 66.8058, "sent_len_max_0": 128.0, "sent_len_max_1": 191.7875, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5629, "doc_norm": 1.4035, "encoder_q-embeddings": 1333.0708, "encoder_q-layer.0": 868.1373, "encoder_q-layer.1": 933.6829, "encoder_q-layer.10": 1210.9502, "encoder_q-layer.11": 2978.9766, "encoder_q-layer.2": 1057.4619, "encoder_q-layer.3": 1101.2556, "encoder_q-layer.4": 1113.9255, "encoder_q-layer.5": 1122.8311, "encoder_q-layer.6": 1144.3674, "encoder_q-layer.7": 1190.2509, "encoder_q-layer.8": 1301.7185, "encoder_q-layer.9": 1158.9991, "epoch": 0.39, "inbatch_neg_score": 0.1842, "inbatch_pos_score": 0.8228, "learning_rate": 3.311111111111112e-05, "loss": 3.5629, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.8811, "num_token_overlap": 14.5838, "num_token_query": 37.4141, "num_token_union": 65.5576, "num_word_context": 202.526, "num_word_doc": 49.9239, "num_word_query": 28.0432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2070.3929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1846, "query_norm": 1.3415, "queue_k_norm": 1.4053, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4141, "sent_len_1": 66.8811, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1362, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.5683, "doc_norm": 1.4064, "encoder_q-embeddings": 2787.2495, "encoder_q-layer.0": 1845.8547, "encoder_q-layer.1": 1866.129, "encoder_q-layer.10": 1382.0865, "encoder_q-layer.11": 3066.2952, "encoder_q-layer.2": 2151.8994, "encoder_q-layer.3": 2295.9624, "encoder_q-layer.4": 2324.479, "encoder_q-layer.5": 2172.0779, "encoder_q-layer.6": 1951.5809, "encoder_q-layer.7": 1887.9004, "encoder_q-layer.8": 1605.7665, "encoder_q-layer.9": 1225.511, "epoch": 0.4, "inbatch_neg_score": 0.1872, "inbatch_pos_score": 0.8223, "learning_rate": 3.3055555555555553e-05, "loss": 3.5683, "norm_diff": 0.0651, "norm_loss": 0.0, "num_token_doc": 66.8992, "num_token_overlap": 14.6752, "num_token_query": 37.4092, "num_token_union": 65.4188, "num_word_context": 202.094, "num_word_doc": 49.9194, "num_word_query": 27.9963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3213.7368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1871, "query_norm": 1.3414, "queue_k_norm": 1.4032, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4092, "sent_len_1": 66.8992, "sent_len_max_0": 128.0, "sent_len_max_1": 189.245, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5688, "doc_norm": 1.3912, "encoder_q-embeddings": 1226.5941, "encoder_q-layer.0": 827.0621, "encoder_q-layer.1": 852.9568, "encoder_q-layer.10": 1295.7067, "encoder_q-layer.11": 2900.4111, "encoder_q-layer.2": 943.5085, "encoder_q-layer.3": 976.7758, "encoder_q-layer.4": 985.6751, "encoder_q-layer.5": 974.0005, "encoder_q-layer.6": 1053.7281, "encoder_q-layer.7": 1130.7612, "encoder_q-layer.8": 1270.9158, "encoder_q-layer.9": 1169.7021, "epoch": 0.4, "inbatch_neg_score": 0.1875, "inbatch_pos_score": 0.8101, "learning_rate": 3.3e-05, "loss": 3.5688, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.8178, "num_token_overlap": 14.5644, "num_token_query": 37.2746, "num_token_union": 65.3317, "num_word_context": 202.097, "num_word_doc": 49.7943, "num_word_query": 27.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1977.6036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1876, "query_norm": 1.329, "queue_k_norm": 1.4008, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2746, "sent_len_1": 66.8178, "sent_len_max_0": 128.0, "sent_len_max_1": 192.2413, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5777, "doc_norm": 1.401, "encoder_q-embeddings": 2158.4937, "encoder_q-layer.0": 1406.1643, "encoder_q-layer.1": 1533.9644, "encoder_q-layer.10": 1137.004, "encoder_q-layer.11": 2979.4617, "encoder_q-layer.2": 1866.8136, "encoder_q-layer.3": 1945.8679, "encoder_q-layer.4": 1964.0326, "encoder_q-layer.5": 2461.4373, "encoder_q-layer.6": 1819.9166, "encoder_q-layer.7": 1546.2523, "encoder_q-layer.8": 1333.329, "encoder_q-layer.9": 1145.1135, "epoch": 0.4, "inbatch_neg_score": 0.1771, "inbatch_pos_score": 0.7974, "learning_rate": 3.2944444444444445e-05, "loss": 3.5777, "norm_diff": 0.0707, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 14.592, "num_token_query": 37.2294, "num_token_union": 65.3549, "num_word_context": 202.2003, "num_word_doc": 49.8753, "num_word_query": 27.8685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2830.4418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1777, "query_norm": 1.3303, "queue_k_norm": 1.4023, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2294, "sent_len_1": 66.8413, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7312, "stdk": 0.048, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5828, "doc_norm": 1.4056, "encoder_q-embeddings": 2144.2634, "encoder_q-layer.0": 1401.225, "encoder_q-layer.1": 1760.9353, "encoder_q-layer.10": 1156.5193, "encoder_q-layer.11": 2941.5339, "encoder_q-layer.2": 1958.1903, "encoder_q-layer.3": 2004.1237, "encoder_q-layer.4": 1939.0754, "encoder_q-layer.5": 1665.8853, "encoder_q-layer.6": 1462.3325, "encoder_q-layer.7": 1344.6008, "encoder_q-layer.8": 1245.2383, "encoder_q-layer.9": 1055.2385, "epoch": 0.4, "inbatch_neg_score": 0.1777, "inbatch_pos_score": 0.7817, "learning_rate": 3.2888888888888894e-05, "loss": 3.5828, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 67.0432, "num_token_overlap": 14.5778, "num_token_query": 37.3707, "num_token_union": 65.5335, "num_word_context": 202.727, "num_word_doc": 50.0153, "num_word_query": 27.981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2675.5304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1779, "query_norm": 1.3116, "queue_k_norm": 1.3993, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3707, "sent_len_1": 67.0432, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.295, "stdk": 0.0482, "stdq": 0.0432, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.5758, "doc_norm": 1.3984, "encoder_q-embeddings": 4643.96, "encoder_q-layer.0": 3264.8279, "encoder_q-layer.1": 3593.8938, "encoder_q-layer.10": 2355.7595, "encoder_q-layer.11": 5782.7314, "encoder_q-layer.2": 4540.415, "encoder_q-layer.3": 4843.1704, "encoder_q-layer.4": 5413.2373, "encoder_q-layer.5": 4857.3608, "encoder_q-layer.6": 3679.4465, "encoder_q-layer.7": 2900.3025, "encoder_q-layer.8": 2605.5073, "encoder_q-layer.9": 2336.8696, "epoch": 0.4, "inbatch_neg_score": 0.1804, "inbatch_pos_score": 0.7842, "learning_rate": 3.283333333333333e-05, "loss": 3.5758, "norm_diff": 0.063, "norm_loss": 0.0, "num_token_doc": 66.7364, "num_token_overlap": 14.6243, "num_token_query": 37.3782, "num_token_union": 65.341, "num_word_context": 202.2517, "num_word_doc": 49.7933, "num_word_query": 27.9889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6060.4403, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1799, "query_norm": 1.3354, "queue_k_norm": 1.4013, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3782, "sent_len_1": 66.7364, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.9563, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5586, "doc_norm": 1.3979, "encoder_q-embeddings": 1386.8247, "encoder_q-layer.0": 961.9778, "encoder_q-layer.1": 1017.0526, "encoder_q-layer.10": 1162.368, "encoder_q-layer.11": 2957.5745, "encoder_q-layer.2": 1131.1678, "encoder_q-layer.3": 1193.5682, "encoder_q-layer.4": 1315.5394, "encoder_q-layer.5": 1286.1801, "encoder_q-layer.6": 1283.7633, "encoder_q-layer.7": 1231.8694, "encoder_q-layer.8": 1236.7274, "encoder_q-layer.9": 1136.8839, "epoch": 0.4, "inbatch_neg_score": 0.1811, "inbatch_pos_score": 0.791, "learning_rate": 3.277777777777778e-05, "loss": 3.5586, "norm_diff": 0.069, "norm_loss": 0.0, "num_token_doc": 66.7038, "num_token_overlap": 14.6066, "num_token_query": 37.4441, "num_token_union": 65.3724, "num_word_context": 202.2051, "num_word_doc": 49.7588, "num_word_query": 28.0347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2139.5411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.181, "query_norm": 1.3289, "queue_k_norm": 1.3977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4441, "sent_len_1": 66.7038, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.78, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5841, "doc_norm": 1.3974, "encoder_q-embeddings": 1196.1731, "encoder_q-layer.0": 807.483, "encoder_q-layer.1": 831.0496, "encoder_q-layer.10": 1160.96, "encoder_q-layer.11": 2881.3442, "encoder_q-layer.2": 883.767, "encoder_q-layer.3": 890.0236, "encoder_q-layer.4": 922.6573, "encoder_q-layer.5": 905.7778, "encoder_q-layer.6": 972.3203, "encoder_q-layer.7": 1040.4365, "encoder_q-layer.8": 1172.7186, "encoder_q-layer.9": 1093.2094, "epoch": 0.4, "inbatch_neg_score": 0.1807, "inbatch_pos_score": 0.7979, "learning_rate": 3.272222222222223e-05, "loss": 3.5841, "norm_diff": 0.0597, "norm_loss": 0.0, "num_token_doc": 66.5271, "num_token_overlap": 14.5774, "num_token_query": 37.5251, "num_token_union": 65.34, "num_word_context": 202.1755, "num_word_doc": 49.5854, "num_word_query": 28.1156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1932.5264, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1803, "query_norm": 1.3377, "queue_k_norm": 1.3966, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5251, "sent_len_1": 66.5271, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.0275, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.559, "doc_norm": 1.3976, "encoder_q-embeddings": 1600.5785, "encoder_q-layer.0": 1118.1566, "encoder_q-layer.1": 1215.5997, "encoder_q-layer.10": 1066.7029, "encoder_q-layer.11": 2815.5889, "encoder_q-layer.2": 1565.8574, "encoder_q-layer.3": 1641.62, "encoder_q-layer.4": 1773.7789, "encoder_q-layer.5": 1506.245, "encoder_q-layer.6": 1212.0073, "encoder_q-layer.7": 1211.2162, "encoder_q-layer.8": 1270.1517, "encoder_q-layer.9": 1103.3546, "epoch": 0.4, "inbatch_neg_score": 0.1817, "inbatch_pos_score": 0.8066, "learning_rate": 3.266666666666667e-05, "loss": 3.559, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.75, "num_token_overlap": 14.6038, "num_token_query": 37.3475, "num_token_union": 65.3572, "num_word_context": 202.1475, "num_word_doc": 49.7963, "num_word_query": 27.9498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2332.2678, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1809, "query_norm": 1.3419, "queue_k_norm": 1.3996, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3475, "sent_len_1": 66.75, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.6625, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5896, "doc_norm": 1.4023, "encoder_q-embeddings": 1281.3655, "encoder_q-layer.0": 859.3624, "encoder_q-layer.1": 943.8941, "encoder_q-layer.10": 1303.915, "encoder_q-layer.11": 3061.6123, "encoder_q-layer.2": 1001.2101, "encoder_q-layer.3": 1046.2792, "encoder_q-layer.4": 1041.4581, "encoder_q-layer.5": 1078.0443, "encoder_q-layer.6": 1151.7406, "encoder_q-layer.7": 1241.8361, "encoder_q-layer.8": 1375.4104, "encoder_q-layer.9": 1176.2512, "epoch": 0.4, "inbatch_neg_score": 0.1906, "inbatch_pos_score": 0.7871, "learning_rate": 3.261111111111111e-05, "loss": 3.5896, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.8227, "num_token_overlap": 14.613, "num_token_query": 37.3165, "num_token_union": 65.4209, "num_word_context": 202.358, "num_word_doc": 49.8749, "num_word_query": 27.9265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.4868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1901, "query_norm": 1.3517, "queue_k_norm": 1.3988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3165, "sent_len_1": 66.8227, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8487, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.5449, "doc_norm": 1.3951, "encoder_q-embeddings": 11773.1182, "encoder_q-layer.0": 8472.2109, "encoder_q-layer.1": 8474.5869, "encoder_q-layer.10": 1265.9907, "encoder_q-layer.11": 3102.9182, "encoder_q-layer.2": 9156.2568, "encoder_q-layer.3": 9060.2061, "encoder_q-layer.4": 9003.6973, "encoder_q-layer.5": 8333.9727, "encoder_q-layer.6": 8664.9121, "encoder_q-layer.7": 6736.0142, "encoder_q-layer.8": 3486.4329, "encoder_q-layer.9": 1547.0609, "epoch": 0.4, "inbatch_neg_score": 0.1996, "inbatch_pos_score": 0.8154, "learning_rate": 3.2555555555555555e-05, "loss": 3.5449, "norm_diff": 0.0258, "norm_loss": 0.0, "num_token_doc": 67.0362, "num_token_overlap": 14.6459, "num_token_query": 37.497, "num_token_union": 65.663, "num_word_context": 202.8307, "num_word_doc": 50.0256, "num_word_query": 28.1221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11819.896, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1995, "query_norm": 1.3693, "queue_k_norm": 1.3982, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.497, "sent_len_1": 67.0362, "sent_len_max_0": 128.0, "sent_len_max_1": 191.085, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.5809, "doc_norm": 1.3975, "encoder_q-embeddings": 1656.463, "encoder_q-layer.0": 1134.0553, "encoder_q-layer.1": 1233.6418, "encoder_q-layer.10": 1268.8268, "encoder_q-layer.11": 3096.623, "encoder_q-layer.2": 1526.0668, "encoder_q-layer.3": 1688.1243, "encoder_q-layer.4": 1860.6309, "encoder_q-layer.5": 1949.4283, "encoder_q-layer.6": 2370.6536, "encoder_q-layer.7": 2204.8833, "encoder_q-layer.8": 1733.3329, "encoder_q-layer.9": 1292.3485, "epoch": 0.41, "inbatch_neg_score": 0.1991, "inbatch_pos_score": 0.7959, "learning_rate": 3.2500000000000004e-05, "loss": 3.5809, "norm_diff": 0.0709, "norm_loss": 0.0, "num_token_doc": 66.8176, "num_token_overlap": 14.5639, "num_token_query": 37.2689, "num_token_union": 65.3932, "num_word_context": 202.5167, "num_word_doc": 49.8758, "num_word_query": 27.897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2736.6809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1992, "query_norm": 1.3266, "queue_k_norm": 1.4004, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2689, "sent_len_1": 66.8176, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.0913, "stdk": 0.0479, "stdq": 0.0432, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.5678, "doc_norm": 1.3943, "encoder_q-embeddings": 1460.4728, "encoder_q-layer.0": 1026.6289, "encoder_q-layer.1": 1051.6456, "encoder_q-layer.10": 1137.2946, "encoder_q-layer.11": 3139.2754, "encoder_q-layer.2": 1219.3828, "encoder_q-layer.3": 1182.4373, "encoder_q-layer.4": 1199.9268, "encoder_q-layer.5": 1261.7809, "encoder_q-layer.6": 1157.1587, "encoder_q-layer.7": 1136.9279, "encoder_q-layer.8": 1268.7395, "encoder_q-layer.9": 1175.2078, "epoch": 0.41, "inbatch_neg_score": 0.2038, "inbatch_pos_score": 0.8296, "learning_rate": 3.2444444444444446e-05, "loss": 3.5678, "norm_diff": 0.0523, "norm_loss": 0.0, "num_token_doc": 66.8601, "num_token_overlap": 14.6472, "num_token_query": 37.4353, "num_token_union": 65.3958, "num_word_context": 202.3456, "num_word_doc": 49.8815, "num_word_query": 28.0362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2191.6293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2043, "query_norm": 1.3419, "queue_k_norm": 1.3999, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4353, "sent_len_1": 66.8601, "sent_len_max_0": 128.0, "sent_len_max_1": 190.645, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.5681, "doc_norm": 1.4023, "encoder_q-embeddings": 1313.6732, "encoder_q-layer.0": 894.9566, "encoder_q-layer.1": 971.6074, "encoder_q-layer.10": 1108.3618, "encoder_q-layer.11": 2879.575, "encoder_q-layer.2": 1040.1548, "encoder_q-layer.3": 1109.3392, "encoder_q-layer.4": 1205.8322, "encoder_q-layer.5": 1094.1594, "encoder_q-layer.6": 1097.5551, "encoder_q-layer.7": 1173.8914, "encoder_q-layer.8": 1211.7123, "encoder_q-layer.9": 1078.0765, "epoch": 0.41, "inbatch_neg_score": 0.1983, "inbatch_pos_score": 0.8159, "learning_rate": 3.238888888888889e-05, "loss": 3.5681, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.475, "num_token_overlap": 14.5402, "num_token_query": 37.1447, "num_token_union": 65.121, "num_word_context": 201.9378, "num_word_doc": 49.6201, "num_word_query": 27.7864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2029.4336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2, "query_norm": 1.3266, "queue_k_norm": 1.4004, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1447, "sent_len_1": 66.475, "sent_len_max_0": 128.0, "sent_len_max_1": 189.46, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.5737, "doc_norm": 1.3987, "encoder_q-embeddings": 2783.167, "encoder_q-layer.0": 1717.2781, "encoder_q-layer.1": 1830.6747, "encoder_q-layer.10": 1282.7078, "encoder_q-layer.11": 3226.2761, "encoder_q-layer.2": 1862.8983, "encoder_q-layer.3": 1920.3882, "encoder_q-layer.4": 2147.2883, "encoder_q-layer.5": 1942.2384, "encoder_q-layer.6": 1823.3405, "encoder_q-layer.7": 1961.2903, "encoder_q-layer.8": 1579.915, "encoder_q-layer.9": 1179.9973, "epoch": 0.41, "inbatch_neg_score": 0.2044, "inbatch_pos_score": 0.8066, "learning_rate": 3.233333333333333e-05, "loss": 3.5737, "norm_diff": 0.0692, "norm_loss": 0.0, "num_token_doc": 66.6472, "num_token_overlap": 14.5225, "num_token_query": 37.2792, "num_token_union": 65.3405, "num_word_context": 202.3291, "num_word_doc": 49.7039, "num_word_query": 27.8976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3123.6579, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2037, "query_norm": 1.3294, "queue_k_norm": 1.4002, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2792, "sent_len_1": 66.6472, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3475, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5565, "doc_norm": 1.4051, "encoder_q-embeddings": 3586.291, "encoder_q-layer.0": 2927.2466, "encoder_q-layer.1": 2843.8425, "encoder_q-layer.10": 1142.2354, "encoder_q-layer.11": 3064.29, "encoder_q-layer.2": 3406.2488, "encoder_q-layer.3": 3310.1318, "encoder_q-layer.4": 3441.2764, "encoder_q-layer.5": 2995.8142, "encoder_q-layer.6": 3347.3855, "encoder_q-layer.7": 3057.2483, "encoder_q-layer.8": 2402.1973, "encoder_q-layer.9": 1240.709, "epoch": 0.41, "inbatch_neg_score": 0.2023, "inbatch_pos_score": 0.8164, "learning_rate": 3.227777777777778e-05, "loss": 3.5565, "norm_diff": 0.0805, "norm_loss": 0.0, "num_token_doc": 66.6227, "num_token_overlap": 14.6791, "num_token_query": 37.6382, "num_token_union": 65.3949, "num_word_context": 202.443, "num_word_doc": 49.7099, "num_word_query": 28.1802, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4481.3575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2012, "query_norm": 1.3246, "queue_k_norm": 1.4016, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6382, "sent_len_1": 66.6227, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1012, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5838, "doc_norm": 1.3956, "encoder_q-embeddings": 2846.4878, "encoder_q-layer.0": 2143.3042, "encoder_q-layer.1": 2076.1694, "encoder_q-layer.10": 1084.2457, "encoder_q-layer.11": 2911.1448, "encoder_q-layer.2": 2400.2917, "encoder_q-layer.3": 2355.6921, "encoder_q-layer.4": 2240.6711, "encoder_q-layer.5": 2488.5044, "encoder_q-layer.6": 1969.8824, "encoder_q-layer.7": 1977.1887, "encoder_q-layer.8": 1477.5157, "encoder_q-layer.9": 1081.4933, "epoch": 0.41, "inbatch_neg_score": 0.196, "inbatch_pos_score": 0.8408, "learning_rate": 3.222222222222223e-05, "loss": 3.5838, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 14.5523, "num_token_query": 37.2303, "num_token_union": 65.2841, "num_word_context": 202.0219, "num_word_doc": 49.7179, "num_word_query": 27.8652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3283.5562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1959, "query_norm": 1.3201, "queue_k_norm": 1.4027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2303, "sent_len_1": 66.6523, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6887, "stdk": 0.0478, "stdq": 0.0438, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.5556, "doc_norm": 1.4026, "encoder_q-embeddings": 1265.912, "encoder_q-layer.0": 845.9142, "encoder_q-layer.1": 909.7163, "encoder_q-layer.10": 1321.3413, "encoder_q-layer.11": 2940.3716, "encoder_q-layer.2": 1041.2205, "encoder_q-layer.3": 1065.7693, "encoder_q-layer.4": 1154.2804, "encoder_q-layer.5": 1179.3834, "encoder_q-layer.6": 1264.9362, "encoder_q-layer.7": 1267.4592, "encoder_q-layer.8": 1347.3618, "encoder_q-layer.9": 1178.2354, "epoch": 0.41, "inbatch_neg_score": 0.1971, "inbatch_pos_score": 0.8442, "learning_rate": 3.2166666666666665e-05, "loss": 3.5556, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.8949, "num_token_overlap": 14.577, "num_token_query": 37.2473, "num_token_union": 65.3754, "num_word_context": 202.5446, "num_word_doc": 49.8969, "num_word_query": 27.9013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.9224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.3477, "queue_k_norm": 1.4028, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2473, "sent_len_1": 66.8949, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.8688, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5583, "doc_norm": 1.3972, "encoder_q-embeddings": 2110.386, "encoder_q-layer.0": 1580.9785, "encoder_q-layer.1": 1380.444, "encoder_q-layer.10": 1145.4232, "encoder_q-layer.11": 3079.8186, "encoder_q-layer.2": 1507.0198, "encoder_q-layer.3": 1487.0326, "encoder_q-layer.4": 1454.238, "encoder_q-layer.5": 1384.0846, "encoder_q-layer.6": 1356.5181, "encoder_q-layer.7": 1372.9727, "encoder_q-layer.8": 1270.8945, "encoder_q-layer.9": 1112.5757, "epoch": 0.41, "inbatch_neg_score": 0.1935, "inbatch_pos_score": 0.79, "learning_rate": 3.2111111111111114e-05, "loss": 3.5583, "norm_diff": 0.0834, "norm_loss": 0.0, "num_token_doc": 66.7743, "num_token_overlap": 14.6395, "num_token_query": 37.4663, "num_token_union": 65.4136, "num_word_context": 202.0441, "num_word_doc": 49.8406, "num_word_query": 28.057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2520.1694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1929, "query_norm": 1.3138, "queue_k_norm": 1.4023, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4663, "sent_len_1": 66.7743, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2113, "stdk": 0.0478, "stdq": 0.0434, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5786, "doc_norm": 1.4035, "encoder_q-embeddings": 1378.777, "encoder_q-layer.0": 931.7532, "encoder_q-layer.1": 971.7145, "encoder_q-layer.10": 1244.6667, "encoder_q-layer.11": 2886.355, "encoder_q-layer.2": 1056.0215, "encoder_q-layer.3": 1071.2406, "encoder_q-layer.4": 1119.5056, "encoder_q-layer.5": 1103.1959, "encoder_q-layer.6": 1183.2128, "encoder_q-layer.7": 1243.1354, "encoder_q-layer.8": 1270.6698, "encoder_q-layer.9": 1179.7074, "epoch": 0.41, "inbatch_neg_score": 0.191, "inbatch_pos_score": 0.8252, "learning_rate": 3.2055555555555556e-05, "loss": 3.5786, "norm_diff": 0.0707, "norm_loss": 0.0, "num_token_doc": 66.5272, "num_token_overlap": 14.6244, "num_token_query": 37.3709, "num_token_union": 65.2183, "num_word_context": 201.9497, "num_word_doc": 49.6624, "num_word_query": 27.9801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2035.5186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1915, "query_norm": 1.3329, "queue_k_norm": 1.4011, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3709, "sent_len_1": 66.5272, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.6562, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.5666, "doc_norm": 1.3989, "encoder_q-embeddings": 2264.6899, "encoder_q-layer.0": 1608.5088, "encoder_q-layer.1": 1729.8075, "encoder_q-layer.10": 1107.4183, "encoder_q-layer.11": 2993.7371, "encoder_q-layer.2": 2086.8423, "encoder_q-layer.3": 2184.7559, "encoder_q-layer.4": 2225.8945, "encoder_q-layer.5": 1997.5149, "encoder_q-layer.6": 1916.0386, "encoder_q-layer.7": 1790.2144, "encoder_q-layer.8": 1395.5262, "encoder_q-layer.9": 1137.4547, "epoch": 0.41, "inbatch_neg_score": 0.1942, "inbatch_pos_score": 0.8247, "learning_rate": 3.2000000000000005e-05, "loss": 3.5666, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 14.6243, "num_token_query": 37.5596, "num_token_union": 65.5373, "num_word_context": 202.3792, "num_word_doc": 49.8208, "num_word_query": 28.144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2945.3108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1953, "query_norm": 1.3445, "queue_k_norm": 1.4019, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5596, "sent_len_1": 66.8218, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1637, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.5609, "doc_norm": 1.404, "encoder_q-embeddings": 2078.696, "encoder_q-layer.0": 1491.3075, "encoder_q-layer.1": 1513.5088, "encoder_q-layer.10": 1375.7548, "encoder_q-layer.11": 3050.7131, "encoder_q-layer.2": 1774.7523, "encoder_q-layer.3": 1805.6396, "encoder_q-layer.4": 1856.6444, "encoder_q-layer.5": 1735.5076, "encoder_q-layer.6": 1727.9196, "encoder_q-layer.7": 1666.1093, "encoder_q-layer.8": 1648.6071, "encoder_q-layer.9": 1275.0333, "epoch": 0.41, "inbatch_neg_score": 0.1953, "inbatch_pos_score": 0.8149, "learning_rate": 3.194444444444444e-05, "loss": 3.5609, "norm_diff": 0.0576, "norm_loss": 0.0, "num_token_doc": 66.6443, "num_token_overlap": 14.6255, "num_token_query": 37.3848, "num_token_union": 65.3224, "num_word_context": 202.0829, "num_word_doc": 49.7203, "num_word_query": 28.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2757.1947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1946, "query_norm": 1.3464, "queue_k_norm": 1.4011, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3848, "sent_len_1": 66.6443, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4363, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5508, "doc_norm": 1.4034, "encoder_q-embeddings": 1514.0544, "encoder_q-layer.0": 1055.4185, "encoder_q-layer.1": 1139.8695, "encoder_q-layer.10": 1260.0625, "encoder_q-layer.11": 3125.3506, "encoder_q-layer.2": 1238.7279, "encoder_q-layer.3": 1209.7579, "encoder_q-layer.4": 1234.7693, "encoder_q-layer.5": 1169.2455, "encoder_q-layer.6": 1276.1868, "encoder_q-layer.7": 1383.7175, "encoder_q-layer.8": 1392.5179, "encoder_q-layer.9": 1168.0759, "epoch": 0.42, "inbatch_neg_score": 0.1939, "inbatch_pos_score": 0.8369, "learning_rate": 3.188888888888889e-05, "loss": 3.5508, "norm_diff": 0.053, "norm_loss": 0.0, "num_token_doc": 67.0046, "num_token_overlap": 14.622, "num_token_query": 37.466, "num_token_union": 65.6289, "num_word_context": 202.6953, "num_word_doc": 50.0308, "num_word_query": 28.0634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2247.8603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1948, "query_norm": 1.3505, "queue_k_norm": 1.4019, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.466, "sent_len_1": 67.0046, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5962, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.5809, "doc_norm": 1.3988, "encoder_q-embeddings": 1340.1787, "encoder_q-layer.0": 923.7081, "encoder_q-layer.1": 948.8245, "encoder_q-layer.10": 1081.1694, "encoder_q-layer.11": 2904.446, "encoder_q-layer.2": 1062.1855, "encoder_q-layer.3": 1056.3652, "encoder_q-layer.4": 1086.4377, "encoder_q-layer.5": 1081.812, "encoder_q-layer.6": 1097.7728, "encoder_q-layer.7": 1124.7793, "encoder_q-layer.8": 1297.1165, "encoder_q-layer.9": 1115.806, "epoch": 0.42, "inbatch_neg_score": 0.1935, "inbatch_pos_score": 0.8174, "learning_rate": 3.183333333333334e-05, "loss": 3.5809, "norm_diff": 0.0466, "norm_loss": 0.0, "num_token_doc": 66.5863, "num_token_overlap": 14.5498, "num_token_query": 37.2262, "num_token_union": 65.1867, "num_word_context": 201.9535, "num_word_doc": 49.6277, "num_word_query": 27.845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.0198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1935, "query_norm": 1.3522, "queue_k_norm": 1.401, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2262, "sent_len_1": 66.5863, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4325, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.5603, "doc_norm": 1.3959, "encoder_q-embeddings": 6060.1621, "encoder_q-layer.0": 4293.8999, "encoder_q-layer.1": 4585.9507, "encoder_q-layer.10": 1204.8834, "encoder_q-layer.11": 3005.1108, "encoder_q-layer.2": 5313.5625, "encoder_q-layer.3": 5409.9307, "encoder_q-layer.4": 5609.6499, "encoder_q-layer.5": 5511.1641, "encoder_q-layer.6": 4529.7671, "encoder_q-layer.7": 3551.0154, "encoder_q-layer.8": 2729.429, "encoder_q-layer.9": 1329.0696, "epoch": 0.42, "inbatch_neg_score": 0.1912, "inbatch_pos_score": 0.7793, "learning_rate": 3.177777777777778e-05, "loss": 3.5603, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.8357, "num_token_overlap": 14.5593, "num_token_query": 37.3278, "num_token_union": 65.4283, "num_word_context": 202.428, "num_word_doc": 49.8635, "num_word_query": 27.9434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6766.4678, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1913, "query_norm": 1.3375, "queue_k_norm": 1.4007, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3278, "sent_len_1": 66.8357, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8688, "stdk": 0.0478, "stdq": 0.0439, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5568, "doc_norm": 1.4032, "encoder_q-embeddings": 3812.1133, "encoder_q-layer.0": 2549.4666, "encoder_q-layer.1": 2627.5164, "encoder_q-layer.10": 1149.621, "encoder_q-layer.11": 2958.0425, "encoder_q-layer.2": 3134.1492, "encoder_q-layer.3": 3380.9631, "encoder_q-layer.4": 3795.9529, "encoder_q-layer.5": 3625.7954, "encoder_q-layer.6": 2654.0483, "encoder_q-layer.7": 2017.9362, "encoder_q-layer.8": 1506.2708, "encoder_q-layer.9": 1184.4069, "epoch": 0.42, "inbatch_neg_score": 0.1918, "inbatch_pos_score": 0.8071, "learning_rate": 3.1722222222222224e-05, "loss": 3.5568, "norm_diff": 0.0744, "norm_loss": 0.0, "num_token_doc": 66.6756, "num_token_overlap": 14.5726, "num_token_query": 37.2388, "num_token_union": 65.2814, "num_word_context": 202.0633, "num_word_doc": 49.7521, "num_word_query": 27.8781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4256.4117, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1906, "query_norm": 1.3288, "queue_k_norm": 1.4021, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2388, "sent_len_1": 66.6756, "sent_len_max_0": 127.995, "sent_len_max_1": 188.7012, "stdk": 0.0481, "stdq": 0.0434, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5262, "doc_norm": 1.4051, "encoder_q-embeddings": 7494.8247, "encoder_q-layer.0": 5168.478, "encoder_q-layer.1": 5572.8159, "encoder_q-layer.10": 2462.7456, "encoder_q-layer.11": 5619.5283, "encoder_q-layer.2": 6421.8662, "encoder_q-layer.3": 6709.5864, "encoder_q-layer.4": 7002.7295, "encoder_q-layer.5": 6258.5366, "encoder_q-layer.6": 5924.2305, "encoder_q-layer.7": 6068.5005, "encoder_q-layer.8": 5518.9849, "encoder_q-layer.9": 3261.0906, "epoch": 0.42, "inbatch_neg_score": 0.1899, "inbatch_pos_score": 0.8242, "learning_rate": 3.1666666666666666e-05, "loss": 3.5262, "norm_diff": 0.0486, "norm_loss": 0.0, "num_token_doc": 67.0523, "num_token_overlap": 14.6427, "num_token_query": 37.2587, "num_token_union": 65.4291, "num_word_context": 202.3075, "num_word_doc": 49.988, "num_word_query": 27.8755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8783.4529, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1901, "query_norm": 1.3565, "queue_k_norm": 1.4011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2587, "sent_len_1": 67.0523, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6962, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5327, "doc_norm": 1.4004, "encoder_q-embeddings": 2217.23, "encoder_q-layer.0": 1511.3789, "encoder_q-layer.1": 1613.1116, "encoder_q-layer.10": 2099.7139, "encoder_q-layer.11": 5604.2676, "encoder_q-layer.2": 1754.5552, "encoder_q-layer.3": 1918.8546, "encoder_q-layer.4": 1984.7292, "encoder_q-layer.5": 1885.606, "encoder_q-layer.6": 1890.6193, "encoder_q-layer.7": 2078.9866, "encoder_q-layer.8": 2377.5642, "encoder_q-layer.9": 2218.2302, "epoch": 0.42, "inbatch_neg_score": 0.1939, "inbatch_pos_score": 0.8135, "learning_rate": 3.1611111111111115e-05, "loss": 3.5327, "norm_diff": 0.0514, "norm_loss": 0.0, "num_token_doc": 66.9083, "num_token_overlap": 14.6286, "num_token_query": 37.4626, "num_token_union": 65.4843, "num_word_context": 202.2021, "num_word_doc": 49.9055, "num_word_query": 28.0526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3775.8721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1931, "query_norm": 1.3491, "queue_k_norm": 1.4018, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4626, "sent_len_1": 66.9083, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.8162, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.5547, "doc_norm": 1.4037, "encoder_q-embeddings": 3963.3438, "encoder_q-layer.0": 3121.4092, "encoder_q-layer.1": 3278.1343, "encoder_q-layer.10": 1456.0798, "encoder_q-layer.11": 3121.3433, "encoder_q-layer.2": 3795.1194, "encoder_q-layer.3": 3451.9595, "encoder_q-layer.4": 3763.3887, "encoder_q-layer.5": 3499.9905, "encoder_q-layer.6": 2427.9414, "encoder_q-layer.7": 1879.6704, "encoder_q-layer.8": 1509.8225, "encoder_q-layer.9": 1198.3149, "epoch": 0.42, "inbatch_neg_score": 0.2004, "inbatch_pos_score": 0.8335, "learning_rate": 3.155555555555556e-05, "loss": 3.5547, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.9317, "num_token_overlap": 14.6252, "num_token_query": 37.4952, "num_token_union": 65.508, "num_word_context": 202.4427, "num_word_doc": 49.9335, "num_word_query": 28.0857, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4530.646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1993, "query_norm": 1.367, "queue_k_norm": 1.4007, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4952, "sent_len_1": 66.9317, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.2175, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5527, "doc_norm": 1.3988, "encoder_q-embeddings": 3280.9678, "encoder_q-layer.0": 2252.0295, "encoder_q-layer.1": 2605.7065, "encoder_q-layer.10": 1232.3615, "encoder_q-layer.11": 3082.6826, "encoder_q-layer.2": 2869.0378, "encoder_q-layer.3": 3314.1006, "encoder_q-layer.4": 3959.0339, "encoder_q-layer.5": 3846.0964, "encoder_q-layer.6": 2478.3872, "encoder_q-layer.7": 1684.2275, "encoder_q-layer.8": 1439.4979, "encoder_q-layer.9": 1185.8406, "epoch": 0.42, "inbatch_neg_score": 0.1982, "inbatch_pos_score": 0.8232, "learning_rate": 3.15e-05, "loss": 3.5527, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.936, "num_token_overlap": 14.663, "num_token_query": 37.4888, "num_token_union": 65.5188, "num_word_context": 202.2267, "num_word_doc": 49.9815, "num_word_query": 28.0705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3999.4043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1975, "query_norm": 1.3399, "queue_k_norm": 1.4014, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4888, "sent_len_1": 66.936, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.0075, "stdk": 0.0479, "stdq": 0.044, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5404, "doc_norm": 1.4019, "encoder_q-embeddings": 6813.5215, "encoder_q-layer.0": 5041.8022, "encoder_q-layer.1": 5374.376, "encoder_q-layer.10": 1217.948, "encoder_q-layer.11": 2977.9292, "encoder_q-layer.2": 6528.439, "encoder_q-layer.3": 6076.8301, "encoder_q-layer.4": 6489.4614, "encoder_q-layer.5": 6523.0522, "encoder_q-layer.6": 5741.791, "encoder_q-layer.7": 4632.3394, "encoder_q-layer.8": 2889.8101, "encoder_q-layer.9": 1298.3087, "epoch": 0.42, "inbatch_neg_score": 0.2019, "inbatch_pos_score": 0.8315, "learning_rate": 3.144444444444445e-05, "loss": 3.5404, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.9035, "num_token_overlap": 14.6106, "num_token_query": 37.359, "num_token_union": 65.4738, "num_word_context": 202.3589, "num_word_doc": 49.9497, "num_word_query": 27.9739, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7734.5947, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1997, "query_norm": 1.3434, "queue_k_norm": 1.4025, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.359, "sent_len_1": 66.9035, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2738, "stdk": 0.048, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.5697, "doc_norm": 1.3939, "encoder_q-embeddings": 1961.6696, "encoder_q-layer.0": 1389.3491, "encoder_q-layer.1": 1614.8948, "encoder_q-layer.10": 1175.7482, "encoder_q-layer.11": 3069.9041, "encoder_q-layer.2": 1794.6647, "encoder_q-layer.3": 1939.4888, "encoder_q-layer.4": 2105.8369, "encoder_q-layer.5": 1904.22, "encoder_q-layer.6": 1613.2322, "encoder_q-layer.7": 1494.8423, "encoder_q-layer.8": 1452.8165, "encoder_q-layer.9": 1274.425, "epoch": 0.42, "inbatch_neg_score": 0.2021, "inbatch_pos_score": 0.7969, "learning_rate": 3.138888888888889e-05, "loss": 3.5697, "norm_diff": 0.064, "norm_loss": 0.0, "num_token_doc": 66.8062, "num_token_overlap": 14.5548, "num_token_query": 37.1841, "num_token_union": 65.3673, "num_word_context": 202.4167, "num_word_doc": 49.8743, "num_word_query": 27.8299, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2743.4933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2026, "query_norm": 1.3299, "queue_k_norm": 1.4043, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1841, "sent_len_1": 66.8062, "sent_len_max_0": 127.99, "sent_len_max_1": 189.5788, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.5635, "doc_norm": 1.407, "encoder_q-embeddings": 4254.293, "encoder_q-layer.0": 2932.0938, "encoder_q-layer.1": 3261.0979, "encoder_q-layer.10": 1282.9844, "encoder_q-layer.11": 2997.3125, "encoder_q-layer.2": 3861.5525, "encoder_q-layer.3": 4395.02, "encoder_q-layer.4": 4578.3501, "encoder_q-layer.5": 4186.2607, "encoder_q-layer.6": 2731.6272, "encoder_q-layer.7": 1928.7861, "encoder_q-layer.8": 1423.8015, "encoder_q-layer.9": 1110.9402, "epoch": 0.43, "inbatch_neg_score": 0.2057, "inbatch_pos_score": 0.8369, "learning_rate": 3.1333333333333334e-05, "loss": 3.5635, "norm_diff": 0.0694, "norm_loss": 0.0, "num_token_doc": 66.4332, "num_token_overlap": 14.502, "num_token_query": 37.23, "num_token_union": 65.1975, "num_word_context": 201.7714, "num_word_doc": 49.5878, "num_word_query": 27.8845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4867.2265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2061, "query_norm": 1.3376, "queue_k_norm": 1.4009, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.23, "sent_len_1": 66.4332, "sent_len_max_0": 128.0, "sent_len_max_1": 187.61, "stdk": 0.0482, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.5562, "doc_norm": 1.4028, "encoder_q-embeddings": 1223.0582, "encoder_q-layer.0": 838.8381, "encoder_q-layer.1": 866.026, "encoder_q-layer.10": 1118.7196, "encoder_q-layer.11": 2989.7058, "encoder_q-layer.2": 953.3455, "encoder_q-layer.3": 999.0427, "encoder_q-layer.4": 1055.7262, "encoder_q-layer.5": 1053.689, "encoder_q-layer.6": 1087.8856, "encoder_q-layer.7": 1205.0946, "encoder_q-layer.8": 1229.8695, "encoder_q-layer.9": 1138.1519, "epoch": 0.43, "inbatch_neg_score": 0.2021, "inbatch_pos_score": 0.8145, "learning_rate": 3.1277777777777776e-05, "loss": 3.5562, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.7473, "num_token_overlap": 14.6107, "num_token_query": 37.3534, "num_token_union": 65.3057, "num_word_context": 202.4951, "num_word_doc": 49.8148, "num_word_query": 27.9797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2034.5395, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2013, "query_norm": 1.3162, "queue_k_norm": 1.4041, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3534, "sent_len_1": 66.7473, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0737, "stdk": 0.048, "stdq": 0.0438, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.5402, "doc_norm": 1.3959, "encoder_q-embeddings": 2480.9116, "encoder_q-layer.0": 1781.1272, "encoder_q-layer.1": 1959.8806, "encoder_q-layer.10": 1217.7303, "encoder_q-layer.11": 3043.2158, "encoder_q-layer.2": 2265.6653, "encoder_q-layer.3": 2336.374, "encoder_q-layer.4": 2533.5044, "encoder_q-layer.5": 2453.0935, "encoder_q-layer.6": 1705.5206, "encoder_q-layer.7": 1424.4166, "encoder_q-layer.8": 1360.2622, "encoder_q-layer.9": 1173.809, "epoch": 0.43, "inbatch_neg_score": 0.1982, "inbatch_pos_score": 0.8027, "learning_rate": 3.1222222222222225e-05, "loss": 3.5402, "norm_diff": 0.0865, "norm_loss": 0.0, "num_token_doc": 66.7735, "num_token_overlap": 14.552, "num_token_query": 37.2418, "num_token_union": 65.3717, "num_word_context": 202.1657, "num_word_doc": 49.8114, "num_word_query": 27.8876, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3093.5678, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1985, "query_norm": 1.3093, "queue_k_norm": 1.4043, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2418, "sent_len_1": 66.7735, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.5613, "stdk": 0.0478, "stdq": 0.0434, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5419, "doc_norm": 1.4036, "encoder_q-embeddings": 1177.8307, "encoder_q-layer.0": 774.2854, "encoder_q-layer.1": 796.3087, "encoder_q-layer.10": 1496.6266, "encoder_q-layer.11": 2924.7021, "encoder_q-layer.2": 924.1088, "encoder_q-layer.3": 975.2263, "encoder_q-layer.4": 1055.5347, "encoder_q-layer.5": 1013.8916, "encoder_q-layer.6": 1060.4761, "encoder_q-layer.7": 1121.0944, "encoder_q-layer.8": 1322.6257, "encoder_q-layer.9": 1214.2516, "epoch": 0.43, "inbatch_neg_score": 0.1981, "inbatch_pos_score": 0.8472, "learning_rate": 3.116666666666667e-05, "loss": 3.5419, "norm_diff": 0.0536, "norm_loss": 0.0, "num_token_doc": 66.6539, "num_token_overlap": 14.6331, "num_token_query": 37.4308, "num_token_union": 65.3415, "num_word_context": 202.2914, "num_word_doc": 49.7431, "num_word_query": 28.0291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1933.4345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1995, "query_norm": 1.3511, "queue_k_norm": 1.4036, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4308, "sent_len_1": 66.6539, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5263, "stdk": 0.0481, "stdq": 0.0448, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5329, "doc_norm": 1.406, "encoder_q-embeddings": 1291.7708, "encoder_q-layer.0": 891.094, "encoder_q-layer.1": 957.6248, "encoder_q-layer.10": 1127.3347, "encoder_q-layer.11": 2871.9534, "encoder_q-layer.2": 1113.0449, "encoder_q-layer.3": 1049.0219, "encoder_q-layer.4": 1135.4894, "encoder_q-layer.5": 1033.8197, "encoder_q-layer.6": 1042.0973, "encoder_q-layer.7": 1133.3893, "encoder_q-layer.8": 1301.1665, "encoder_q-layer.9": 1152.0803, "epoch": 0.43, "inbatch_neg_score": 0.1975, "inbatch_pos_score": 0.812, "learning_rate": 3.111111111111111e-05, "loss": 3.5329, "norm_diff": 0.0718, "norm_loss": 0.0, "num_token_doc": 66.8085, "num_token_overlap": 14.6116, "num_token_query": 37.3611, "num_token_union": 65.4042, "num_word_context": 202.1823, "num_word_doc": 49.8214, "num_word_query": 27.9723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2043.6369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1964, "query_norm": 1.3342, "queue_k_norm": 1.4049, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3611, "sent_len_1": 66.8085, "sent_len_max_0": 127.995, "sent_len_max_1": 191.6463, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.555, "doc_norm": 1.407, "encoder_q-embeddings": 1194.951, "encoder_q-layer.0": 773.2673, "encoder_q-layer.1": 793.4022, "encoder_q-layer.10": 1166.9589, "encoder_q-layer.11": 3020.8474, "encoder_q-layer.2": 914.0208, "encoder_q-layer.3": 978.9176, "encoder_q-layer.4": 993.9659, "encoder_q-layer.5": 976.0695, "encoder_q-layer.6": 1102.0164, "encoder_q-layer.7": 1117.4001, "encoder_q-layer.8": 1304.6823, "encoder_q-layer.9": 1210.4039, "epoch": 0.43, "inbatch_neg_score": 0.1936, "inbatch_pos_score": 0.8027, "learning_rate": 3.105555555555555e-05, "loss": 3.555, "norm_diff": 0.083, "norm_loss": 0.0, "num_token_doc": 66.5698, "num_token_overlap": 14.5163, "num_token_query": 37.1554, "num_token_union": 65.195, "num_word_context": 202.0605, "num_word_doc": 49.6611, "num_word_query": 27.8053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2008.8667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1938, "query_norm": 1.3239, "queue_k_norm": 1.4048, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1554, "sent_len_1": 66.5698, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.6712, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.5467, "doc_norm": 1.4024, "encoder_q-embeddings": 1577.1377, "encoder_q-layer.0": 1118.2556, "encoder_q-layer.1": 1153.5778, "encoder_q-layer.10": 1280.0554, "encoder_q-layer.11": 2980.5947, "encoder_q-layer.2": 1356.7001, "encoder_q-layer.3": 1328.2863, "encoder_q-layer.4": 1331.046, "encoder_q-layer.5": 1144.6176, "encoder_q-layer.6": 1112.4464, "encoder_q-layer.7": 1263.1508, "encoder_q-layer.8": 1427.3862, "encoder_q-layer.9": 1289.1522, "epoch": 0.43, "inbatch_neg_score": 0.1937, "inbatch_pos_score": 0.8096, "learning_rate": 3.1e-05, "loss": 3.5467, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.6319, "num_token_overlap": 14.6163, "num_token_query": 37.3937, "num_token_union": 65.3083, "num_word_context": 202.1852, "num_word_doc": 49.703, "num_word_query": 27.978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2232.7018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1918, "query_norm": 1.3479, "queue_k_norm": 1.4031, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3937, "sent_len_1": 66.6319, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.9338, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.5584, "doc_norm": 1.4035, "encoder_q-embeddings": 4428.9053, "encoder_q-layer.0": 3470.6936, "encoder_q-layer.1": 4516.3745, "encoder_q-layer.10": 1163.0479, "encoder_q-layer.11": 2952.5999, "encoder_q-layer.2": 5082.752, "encoder_q-layer.3": 4460.0806, "encoder_q-layer.4": 4142.6719, "encoder_q-layer.5": 3924.0073, "encoder_q-layer.6": 3789.3787, "encoder_q-layer.7": 2714.0383, "encoder_q-layer.8": 1850.0112, "encoder_q-layer.9": 1183.3295, "epoch": 0.43, "inbatch_neg_score": 0.1872, "inbatch_pos_score": 0.812, "learning_rate": 3.094444444444445e-05, "loss": 3.5584, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.4579, "num_token_overlap": 14.4803, "num_token_query": 37.1369, "num_token_union": 65.1617, "num_word_context": 202.0786, "num_word_doc": 49.5684, "num_word_query": 27.8047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5378.5771, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.187, "query_norm": 1.3489, "queue_k_norm": 1.4034, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1369, "sent_len_1": 66.4579, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3825, "stdk": 0.048, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.5283, "doc_norm": 1.4008, "encoder_q-embeddings": 2741.6077, "encoder_q-layer.0": 2005.9967, "encoder_q-layer.1": 2257.3074, "encoder_q-layer.10": 1039.0823, "encoder_q-layer.11": 2813.1689, "encoder_q-layer.2": 2804.7761, "encoder_q-layer.3": 2717.5051, "encoder_q-layer.4": 3249.8511, "encoder_q-layer.5": 3537.3384, "encoder_q-layer.6": 3724.0684, "encoder_q-layer.7": 3091.4827, "encoder_q-layer.8": 1279.8683, "encoder_q-layer.9": 1089.4971, "epoch": 0.43, "inbatch_neg_score": 0.1887, "inbatch_pos_score": 0.832, "learning_rate": 3.088888888888889e-05, "loss": 3.5283, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.9875, "num_token_overlap": 14.6706, "num_token_query": 37.5159, "num_token_union": 65.5435, "num_word_context": 202.4938, "num_word_doc": 50.0117, "num_word_query": 28.1233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3910.0022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.188, "query_norm": 1.3657, "queue_k_norm": 1.4062, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5159, "sent_len_1": 66.9875, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.325, "stdk": 0.0479, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5553, "doc_norm": 1.4043, "encoder_q-embeddings": 1093.6472, "encoder_q-layer.0": 704.5725, "encoder_q-layer.1": 715.1332, "encoder_q-layer.10": 1134.5697, "encoder_q-layer.11": 2953.1667, "encoder_q-layer.2": 792.3008, "encoder_q-layer.3": 806.2368, "encoder_q-layer.4": 880.6776, "encoder_q-layer.5": 885.694, "encoder_q-layer.6": 964.9919, "encoder_q-layer.7": 1046.5895, "encoder_q-layer.8": 1325.4648, "encoder_q-layer.9": 1142.0669, "epoch": 0.43, "inbatch_neg_score": 0.1906, "inbatch_pos_score": 0.8262, "learning_rate": 3.0833333333333335e-05, "loss": 3.5553, "norm_diff": 0.0491, "norm_loss": 0.0, "num_token_doc": 66.982, "num_token_overlap": 14.7085, "num_token_query": 37.4335, "num_token_union": 65.4572, "num_word_context": 202.4689, "num_word_doc": 49.9711, "num_word_query": 28.0339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1907.4214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1907, "query_norm": 1.3553, "queue_k_norm": 1.4048, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4335, "sent_len_1": 66.982, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9462, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5225, "doc_norm": 1.3972, "encoder_q-embeddings": 1391.9491, "encoder_q-layer.0": 975.8604, "encoder_q-layer.1": 978.9794, "encoder_q-layer.10": 1132.0751, "encoder_q-layer.11": 2821.0398, "encoder_q-layer.2": 1097.7408, "encoder_q-layer.3": 1104.0822, "encoder_q-layer.4": 1146.7274, "encoder_q-layer.5": 1075.4573, "encoder_q-layer.6": 1111.5151, "encoder_q-layer.7": 1175.7914, "encoder_q-layer.8": 1229.3206, "encoder_q-layer.9": 1133.0588, "epoch": 0.44, "inbatch_neg_score": 0.1898, "inbatch_pos_score": 0.8145, "learning_rate": 3.077777777777778e-05, "loss": 3.5225, "norm_diff": 0.0504, "norm_loss": 0.0, "num_token_doc": 66.8427, "num_token_overlap": 14.6368, "num_token_query": 37.4884, "num_token_union": 65.5313, "num_word_context": 202.3929, "num_word_doc": 49.9308, "num_word_query": 28.0691, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2012.1, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1884, "query_norm": 1.3467, "queue_k_norm": 1.4038, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4884, "sent_len_1": 66.8427, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.3787, "stdk": 0.0478, "stdq": 0.0438, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.5254, "doc_norm": 1.3976, "encoder_q-embeddings": 1295.0725, "encoder_q-layer.0": 851.4942, "encoder_q-layer.1": 865.7259, "encoder_q-layer.10": 1391.3407, "encoder_q-layer.11": 2887.0896, "encoder_q-layer.2": 955.1224, "encoder_q-layer.3": 1094.4528, "encoder_q-layer.4": 1100.2656, "encoder_q-layer.5": 1059.9691, "encoder_q-layer.6": 1078.5588, "encoder_q-layer.7": 1161.1542, "encoder_q-layer.8": 1341.5719, "encoder_q-layer.9": 1183.3976, "epoch": 0.44, "inbatch_neg_score": 0.1992, "inbatch_pos_score": 0.8027, "learning_rate": 3.0722222222222227e-05, "loss": 3.5254, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.9477, "num_token_overlap": 14.6617, "num_token_query": 37.5741, "num_token_union": 65.5825, "num_word_context": 202.3814, "num_word_doc": 50.0142, "num_word_query": 28.1412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2044.131, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1985, "query_norm": 1.3544, "queue_k_norm": 1.4037, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5741, "sent_len_1": 66.9477, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6075, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5327, "doc_norm": 1.4043, "encoder_q-embeddings": 2037.9879, "encoder_q-layer.0": 1358.0376, "encoder_q-layer.1": 1476.7303, "encoder_q-layer.10": 1060.9528, "encoder_q-layer.11": 2713.2266, "encoder_q-layer.2": 1730.9143, "encoder_q-layer.3": 1771.0386, "encoder_q-layer.4": 1946.3484, "encoder_q-layer.5": 2090.3687, "encoder_q-layer.6": 1831.6948, "encoder_q-layer.7": 1430.588, "encoder_q-layer.8": 1443.6292, "encoder_q-layer.9": 1176.9473, "epoch": 0.44, "inbatch_neg_score": 0.2048, "inbatch_pos_score": 0.8242, "learning_rate": 3.066666666666667e-05, "loss": 3.5327, "norm_diff": 0.038, "norm_loss": 0.0, "num_token_doc": 66.7102, "num_token_overlap": 14.5773, "num_token_query": 37.3072, "num_token_union": 65.3628, "num_word_context": 202.2229, "num_word_doc": 49.8138, "num_word_query": 27.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2634.3421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2037, "query_norm": 1.3663, "queue_k_norm": 1.4049, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3072, "sent_len_1": 66.7102, "sent_len_max_0": 128.0, "sent_len_max_1": 187.7, "stdk": 0.0481, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5375, "doc_norm": 1.3985, "encoder_q-embeddings": 1047.0526, "encoder_q-layer.0": 707.856, "encoder_q-layer.1": 697.7693, "encoder_q-layer.10": 1129.6077, "encoder_q-layer.11": 2755.9241, "encoder_q-layer.2": 747.8741, "encoder_q-layer.3": 759.708, "encoder_q-layer.4": 792.1299, "encoder_q-layer.5": 793.8831, "encoder_q-layer.6": 928.3687, "encoder_q-layer.7": 1059.2753, "encoder_q-layer.8": 1203.9385, "encoder_q-layer.9": 1144.3979, "epoch": 0.44, "inbatch_neg_score": 0.2104, "inbatch_pos_score": 0.8164, "learning_rate": 3.061111111111111e-05, "loss": 3.5375, "norm_diff": 0.0383, "norm_loss": 0.0, "num_token_doc": 66.7319, "num_token_overlap": 14.5194, "num_token_query": 37.1658, "num_token_union": 65.3002, "num_word_context": 202.1181, "num_word_doc": 49.7739, "num_word_query": 27.8212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1766.0205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2081, "query_norm": 1.3602, "queue_k_norm": 1.4043, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1658, "sent_len_1": 66.7319, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.0263, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5082, "doc_norm": 1.4091, "encoder_q-embeddings": 1155.5431, "encoder_q-layer.0": 749.6028, "encoder_q-layer.1": 794.9617, "encoder_q-layer.10": 1206.6787, "encoder_q-layer.11": 2960.843, "encoder_q-layer.2": 902.9468, "encoder_q-layer.3": 972.539, "encoder_q-layer.4": 993.4052, "encoder_q-layer.5": 1021.3375, "encoder_q-layer.6": 1109.4818, "encoder_q-layer.7": 1180.2407, "encoder_q-layer.8": 1400.8201, "encoder_q-layer.9": 1160.9058, "epoch": 0.44, "inbatch_neg_score": 0.2071, "inbatch_pos_score": 0.8413, "learning_rate": 3.055555555555556e-05, "loss": 3.5082, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.768, "num_token_overlap": 14.7355, "num_token_query": 37.6039, "num_token_union": 65.4625, "num_word_context": 202.4344, "num_word_doc": 49.8427, "num_word_query": 28.1382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1970.3762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2079, "query_norm": 1.3696, "queue_k_norm": 1.4061, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6039, "sent_len_1": 66.768, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.4487, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.5278, "doc_norm": 1.407, "encoder_q-embeddings": 1073.5238, "encoder_q-layer.0": 720.8668, "encoder_q-layer.1": 728.3078, "encoder_q-layer.10": 1361.1338, "encoder_q-layer.11": 3123.0679, "encoder_q-layer.2": 787.1444, "encoder_q-layer.3": 815.6643, "encoder_q-layer.4": 856.4315, "encoder_q-layer.5": 876.0427, "encoder_q-layer.6": 938.8364, "encoder_q-layer.7": 1061.9651, "encoder_q-layer.8": 1294.1023, "encoder_q-layer.9": 1109.3878, "epoch": 0.44, "inbatch_neg_score": 0.2128, "inbatch_pos_score": 0.8247, "learning_rate": 3.05e-05, "loss": 3.5278, "norm_diff": 0.0559, "norm_loss": 0.0, "num_token_doc": 66.87, "num_token_overlap": 14.6765, "num_token_query": 37.3147, "num_token_union": 65.3277, "num_word_context": 201.9003, "num_word_doc": 49.8686, "num_word_query": 27.9536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1918.996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2117, "query_norm": 1.3511, "queue_k_norm": 1.4069, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3147, "sent_len_1": 66.87, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.6463, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5241, "doc_norm": 1.4099, "encoder_q-embeddings": 2057.5769, "encoder_q-layer.0": 1376.322, "encoder_q-layer.1": 1417.2836, "encoder_q-layer.10": 2320.9922, "encoder_q-layer.11": 6294.3228, "encoder_q-layer.2": 1550.0629, "encoder_q-layer.3": 1589.1064, "encoder_q-layer.4": 1659.566, "encoder_q-layer.5": 1692.6345, "encoder_q-layer.6": 1888.9795, "encoder_q-layer.7": 2147.2993, "encoder_q-layer.8": 2477.1467, "encoder_q-layer.9": 2343.8496, "epoch": 0.44, "inbatch_neg_score": 0.2039, "inbatch_pos_score": 0.8105, "learning_rate": 3.044444444444445e-05, "loss": 3.5241, "norm_diff": 0.0964, "norm_loss": 0.0, "num_token_doc": 66.9067, "num_token_overlap": 14.5379, "num_token_query": 37.1581, "num_token_union": 65.4319, "num_word_context": 202.5176, "num_word_doc": 50.0017, "num_word_query": 27.8248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3836.0098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2052, "query_norm": 1.3136, "queue_k_norm": 1.4075, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1581, "sent_len_1": 66.9067, "sent_len_max_0": 128.0, "sent_len_max_1": 188.01, "stdk": 0.0483, "stdq": 0.0431, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5131, "doc_norm": 1.4075, "encoder_q-embeddings": 2235.323, "encoder_q-layer.0": 1431.8051, "encoder_q-layer.1": 1499.0896, "encoder_q-layer.10": 2269.0537, "encoder_q-layer.11": 5775.4297, "encoder_q-layer.2": 1669.7834, "encoder_q-layer.3": 1797.5333, "encoder_q-layer.4": 1897.0952, "encoder_q-layer.5": 1919.1254, "encoder_q-layer.6": 2026.1587, "encoder_q-layer.7": 2142.7695, "encoder_q-layer.8": 2491.3269, "encoder_q-layer.9": 2285.0159, "epoch": 0.44, "inbatch_neg_score": 0.2074, "inbatch_pos_score": 0.8452, "learning_rate": 3.0388888888888887e-05, "loss": 3.5131, "norm_diff": 0.0714, "norm_loss": 0.0, "num_token_doc": 66.774, "num_token_overlap": 14.6432, "num_token_query": 37.4525, "num_token_union": 65.4807, "num_word_context": 202.5719, "num_word_doc": 49.8869, "num_word_query": 28.0671, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3800.4868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.209, "query_norm": 1.3361, "queue_k_norm": 1.4087, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4525, "sent_len_1": 66.774, "sent_len_max_0": 127.995, "sent_len_max_1": 187.1113, "stdk": 0.0482, "stdq": 0.044, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.5072, "doc_norm": 1.402, "encoder_q-embeddings": 2501.5854, "encoder_q-layer.0": 1652.0807, "encoder_q-layer.1": 1664.608, "encoder_q-layer.10": 2420.9778, "encoder_q-layer.11": 6205.7637, "encoder_q-layer.2": 1849.0852, "encoder_q-layer.3": 1926.3387, "encoder_q-layer.4": 1946.1144, "encoder_q-layer.5": 2035.4415, "encoder_q-layer.6": 2268.6201, "encoder_q-layer.7": 2593.5657, "encoder_q-layer.8": 2635.3977, "encoder_q-layer.9": 2300.8538, "epoch": 0.44, "inbatch_neg_score": 0.2013, "inbatch_pos_score": 0.8262, "learning_rate": 3.0333333333333337e-05, "loss": 3.5072, "norm_diff": 0.0883, "norm_loss": 0.0, "num_token_doc": 66.8394, "num_token_overlap": 14.6435, "num_token_query": 37.3814, "num_token_union": 65.4187, "num_word_context": 202.3862, "num_word_doc": 49.8865, "num_word_query": 27.9682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3968.9273, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2004, "query_norm": 1.3137, "queue_k_norm": 1.4073, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3814, "sent_len_1": 66.8394, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.34, "stdk": 0.0479, "stdq": 0.0437, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.541, "doc_norm": 1.4078, "encoder_q-embeddings": 6535.6606, "encoder_q-layer.0": 4657.1924, "encoder_q-layer.1": 4651.0835, "encoder_q-layer.10": 2291.2512, "encoder_q-layer.11": 5568.3267, "encoder_q-layer.2": 4178.2007, "encoder_q-layer.3": 4047.9521, "encoder_q-layer.4": 3466.2644, "encoder_q-layer.5": 3449.1221, "encoder_q-layer.6": 3251.6206, "encoder_q-layer.7": 2709.0718, "encoder_q-layer.8": 2590.5723, "encoder_q-layer.9": 2128.2864, "epoch": 0.44, "inbatch_neg_score": 0.1978, "inbatch_pos_score": 0.855, "learning_rate": 3.0277777777777776e-05, "loss": 3.541, "norm_diff": 0.07, "norm_loss": 0.0, "num_token_doc": 66.8001, "num_token_overlap": 14.6417, "num_token_query": 37.3545, "num_token_union": 65.3776, "num_word_context": 202.1884, "num_word_doc": 49.8187, "num_word_query": 27.9651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6305.339, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1978, "query_norm": 1.3378, "queue_k_norm": 1.4063, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3545, "sent_len_1": 66.8001, "sent_len_max_0": 127.9712, "sent_len_max_1": 190.9025, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5285, "doc_norm": 1.4065, "encoder_q-embeddings": 2290.4832, "encoder_q-layer.0": 1535.1948, "encoder_q-layer.1": 1566.6758, "encoder_q-layer.10": 2410.7798, "encoder_q-layer.11": 6057.4097, "encoder_q-layer.2": 1770.7751, "encoder_q-layer.3": 1859.0609, "encoder_q-layer.4": 2090.1184, "encoder_q-layer.5": 2047.3936, "encoder_q-layer.6": 2185.4487, "encoder_q-layer.7": 2278.0735, "encoder_q-layer.8": 2672.8916, "encoder_q-layer.9": 2388.6216, "epoch": 0.45, "inbatch_neg_score": 0.1919, "inbatch_pos_score": 0.8115, "learning_rate": 3.0222222222222225e-05, "loss": 3.5285, "norm_diff": 0.1107, "norm_loss": 0.0, "num_token_doc": 66.8973, "num_token_overlap": 14.6233, "num_token_query": 37.4076, "num_token_union": 65.467, "num_word_context": 202.3736, "num_word_doc": 49.9338, "num_word_query": 28.0263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3894.3151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1923, "query_norm": 1.2957, "queue_k_norm": 1.4079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4076, "sent_len_1": 66.8973, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2887, "stdk": 0.0481, "stdq": 0.0432, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5242, "doc_norm": 1.4066, "encoder_q-embeddings": 2197.7419, "encoder_q-layer.0": 1376.7626, "encoder_q-layer.1": 1457.4298, "encoder_q-layer.10": 2404.7026, "encoder_q-layer.11": 5826.1577, "encoder_q-layer.2": 1575.6532, "encoder_q-layer.3": 1654.7329, "encoder_q-layer.4": 1724.8442, "encoder_q-layer.5": 1656.5249, "encoder_q-layer.6": 1879.8263, "encoder_q-layer.7": 2095.2332, "encoder_q-layer.8": 2480.1472, "encoder_q-layer.9": 2264.8494, "epoch": 0.45, "inbatch_neg_score": 0.1878, "inbatch_pos_score": 0.8198, "learning_rate": 3.016666666666667e-05, "loss": 3.5242, "norm_diff": 0.0905, "norm_loss": 0.0, "num_token_doc": 66.6041, "num_token_overlap": 14.5969, "num_token_query": 37.2811, "num_token_union": 65.2245, "num_word_context": 202.238, "num_word_doc": 49.696, "num_word_query": 27.9056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3639.8416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.3161, "queue_k_norm": 1.4063, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2811, "sent_len_1": 66.6041, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6975, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.5219, "doc_norm": 1.4043, "encoder_q-embeddings": 3146.2471, "encoder_q-layer.0": 2212.3843, "encoder_q-layer.1": 2454.5991, "encoder_q-layer.10": 2389.1733, "encoder_q-layer.11": 6119.6758, "encoder_q-layer.2": 2576.6021, "encoder_q-layer.3": 2236.7625, "encoder_q-layer.4": 2363.283, "encoder_q-layer.5": 2010.9159, "encoder_q-layer.6": 2077.7124, "encoder_q-layer.7": 2209.938, "encoder_q-layer.8": 2547.863, "encoder_q-layer.9": 2305.5369, "epoch": 0.45, "inbatch_neg_score": 0.1902, "inbatch_pos_score": 0.8008, "learning_rate": 3.0111111111111113e-05, "loss": 3.5219, "norm_diff": 0.0931, "norm_loss": 0.0, "num_token_doc": 67.0221, "num_token_overlap": 14.6114, "num_token_query": 37.2884, "num_token_union": 65.4574, "num_word_context": 202.2939, "num_word_doc": 50.0458, "num_word_query": 27.909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4286.8129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1888, "query_norm": 1.3112, "queue_k_norm": 1.408, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2884, "sent_len_1": 67.0221, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.84, "stdk": 0.048, "stdq": 0.0436, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5223, "doc_norm": 1.4055, "encoder_q-embeddings": 2213.7603, "encoder_q-layer.0": 1552.9508, "encoder_q-layer.1": 1627.9335, "encoder_q-layer.10": 2396.9487, "encoder_q-layer.11": 6025.0508, "encoder_q-layer.2": 1904.6158, "encoder_q-layer.3": 2167.3535, "encoder_q-layer.4": 2090.3152, "encoder_q-layer.5": 1983.4836, "encoder_q-layer.6": 1990.017, "encoder_q-layer.7": 2295.6365, "encoder_q-layer.8": 2628.1064, "encoder_q-layer.9": 2472.5469, "epoch": 0.45, "inbatch_neg_score": 0.1841, "inbatch_pos_score": 0.8179, "learning_rate": 3.005555555555556e-05, "loss": 3.5223, "norm_diff": 0.0811, "norm_loss": 0.0, "num_token_doc": 66.7668, "num_token_overlap": 14.6258, "num_token_query": 37.4167, "num_token_union": 65.4368, "num_word_context": 202.3165, "num_word_doc": 49.821, "num_word_query": 28.0268, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3889.4326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.184, "query_norm": 1.3244, "queue_k_norm": 1.4061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4167, "sent_len_1": 66.7668, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.605, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.5293, "doc_norm": 1.4022, "encoder_q-embeddings": 10221.0527, "encoder_q-layer.0": 7419.5615, "encoder_q-layer.1": 8126.8428, "encoder_q-layer.10": 2391.5564, "encoder_q-layer.11": 6373.1558, "encoder_q-layer.2": 9017.2539, "encoder_q-layer.3": 9617.3057, "encoder_q-layer.4": 10235.5586, "encoder_q-layer.5": 10541.2852, "encoder_q-layer.6": 7113.7471, "encoder_q-layer.7": 5177.1265, "encoder_q-layer.8": 3507.3252, "encoder_q-layer.9": 2535.7905, "epoch": 0.45, "inbatch_neg_score": 0.1814, "inbatch_pos_score": 0.7646, "learning_rate": 3e-05, "loss": 3.5293, "norm_diff": 0.0956, "norm_loss": 0.0, "num_token_doc": 66.8498, "num_token_overlap": 14.53, "num_token_query": 37.2694, "num_token_union": 65.3999, "num_word_context": 202.3287, "num_word_doc": 49.8978, "num_word_query": 27.9086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11619.4817, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1812, "query_norm": 1.3066, "queue_k_norm": 1.4066, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2694, "sent_len_1": 66.8498, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.4288, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.528, "doc_norm": 1.4051, "encoder_q-embeddings": 2657.6013, "encoder_q-layer.0": 1775.2124, "encoder_q-layer.1": 1910.5535, "encoder_q-layer.10": 2443.7981, "encoder_q-layer.11": 5899.5669, "encoder_q-layer.2": 2133.4417, "encoder_q-layer.3": 2149.1345, "encoder_q-layer.4": 2217.2473, "encoder_q-layer.5": 2274.7671, "encoder_q-layer.6": 2349.1411, "encoder_q-layer.7": 2412.6289, "encoder_q-layer.8": 2623.5232, "encoder_q-layer.9": 2387.3872, "epoch": 0.45, "inbatch_neg_score": 0.1742, "inbatch_pos_score": 0.8003, "learning_rate": 2.9944444444444446e-05, "loss": 3.528, "norm_diff": 0.0706, "norm_loss": 0.0, "num_token_doc": 66.8977, "num_token_overlap": 14.5581, "num_token_query": 37.2956, "num_token_union": 65.4824, "num_word_context": 202.3195, "num_word_doc": 49.9338, "num_word_query": 27.9355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4144.1683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1742, "query_norm": 1.3345, "queue_k_norm": 1.4075, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2956, "sent_len_1": 66.8977, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3487, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.5053, "doc_norm": 1.398, "encoder_q-embeddings": 3936.7944, "encoder_q-layer.0": 2647.2939, "encoder_q-layer.1": 3011.0862, "encoder_q-layer.10": 2327.2354, "encoder_q-layer.11": 5609.333, "encoder_q-layer.2": 3430.9229, "encoder_q-layer.3": 3726.1184, "encoder_q-layer.4": 3838.8804, "encoder_q-layer.5": 3672.8164, "encoder_q-layer.6": 3750.7073, "encoder_q-layer.7": 3687.0869, "encoder_q-layer.8": 2924.9148, "encoder_q-layer.9": 2272.0591, "epoch": 0.45, "inbatch_neg_score": 0.1769, "inbatch_pos_score": 0.8018, "learning_rate": 2.988888888888889e-05, "loss": 3.5053, "norm_diff": 0.065, "norm_loss": 0.0, "num_token_doc": 66.8519, "num_token_overlap": 14.575, "num_token_query": 37.2283, "num_token_union": 65.3846, "num_word_context": 202.3785, "num_word_doc": 49.9075, "num_word_query": 27.8671, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5285.1867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1757, "query_norm": 1.3331, "queue_k_norm": 1.4056, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2283, "sent_len_1": 66.8519, "sent_len_max_0": 127.985, "sent_len_max_1": 188.9325, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5096, "doc_norm": 1.4094, "encoder_q-embeddings": 9690.6641, "encoder_q-layer.0": 7575.7417, "encoder_q-layer.1": 8768.6416, "encoder_q-layer.10": 2204.7827, "encoder_q-layer.11": 5571.4092, "encoder_q-layer.2": 10470.1777, "encoder_q-layer.3": 11620.3506, "encoder_q-layer.4": 12615.3955, "encoder_q-layer.5": 13942.876, "encoder_q-layer.6": 14692.4121, "encoder_q-layer.7": 17275.0957, "encoder_q-layer.8": 12124.3984, "encoder_q-layer.9": 3390.9585, "epoch": 0.45, "inbatch_neg_score": 0.1777, "inbatch_pos_score": 0.8164, "learning_rate": 2.9833333333333335e-05, "loss": 3.5096, "norm_diff": 0.0669, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 14.6027, "num_token_query": 37.417, "num_token_union": 65.479, "num_word_context": 202.3134, "num_word_doc": 49.8504, "num_word_query": 28.0258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15969.2041, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1772, "query_norm": 1.3426, "queue_k_norm": 1.4041, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.417, "sent_len_1": 66.8562, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5195, "doc_norm": 1.4025, "encoder_q-embeddings": 3520.8506, "encoder_q-layer.0": 2565.4312, "encoder_q-layer.1": 2640.8643, "encoder_q-layer.10": 2276.6914, "encoder_q-layer.11": 5459.396, "encoder_q-layer.2": 2728.7275, "encoder_q-layer.3": 2646.4365, "encoder_q-layer.4": 2519.4697, "encoder_q-layer.5": 2372.1809, "encoder_q-layer.6": 2329.6963, "encoder_q-layer.7": 2297.1133, "encoder_q-layer.8": 2672.1738, "encoder_q-layer.9": 2378.3552, "epoch": 0.45, "inbatch_neg_score": 0.1783, "inbatch_pos_score": 0.8062, "learning_rate": 2.9777777777777777e-05, "loss": 3.5195, "norm_diff": 0.0389, "norm_loss": 0.0, "num_token_doc": 66.8679, "num_token_overlap": 14.6309, "num_token_query": 37.3528, "num_token_union": 65.4991, "num_word_context": 202.2737, "num_word_doc": 49.898, "num_word_query": 27.9666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4392.7915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1785, "query_norm": 1.3636, "queue_k_norm": 1.4044, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3528, "sent_len_1": 66.8679, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8487, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5303, "doc_norm": 1.3954, "encoder_q-embeddings": 3349.571, "encoder_q-layer.0": 2285.3811, "encoder_q-layer.1": 2540.5535, "encoder_q-layer.10": 2302.4685, "encoder_q-layer.11": 5727.0708, "encoder_q-layer.2": 2967.0098, "encoder_q-layer.3": 3143.3762, "encoder_q-layer.4": 3492.1846, "encoder_q-layer.5": 3353.3757, "encoder_q-layer.6": 2904.5139, "encoder_q-layer.7": 2797.8877, "encoder_q-layer.8": 2770.9524, "encoder_q-layer.9": 2271.9563, "epoch": 0.45, "inbatch_neg_score": 0.1834, "inbatch_pos_score": 0.8091, "learning_rate": 2.9722222222222223e-05, "loss": 3.5303, "norm_diff": 0.0328, "norm_loss": 0.0, "num_token_doc": 66.5434, "num_token_overlap": 14.5479, "num_token_query": 37.2834, "num_token_union": 65.2621, "num_word_context": 201.9507, "num_word_doc": 49.6417, "num_word_query": 27.9121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4770.4664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1836, "query_norm": 1.363, "queue_k_norm": 1.4023, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2834, "sent_len_1": 66.5434, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5238, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5292, "doc_norm": 1.407, "encoder_q-embeddings": 1515.1256, "encoder_q-layer.0": 1065.2489, "encoder_q-layer.1": 1143.7963, "encoder_q-layer.10": 1329.9481, "encoder_q-layer.11": 2932.1953, "encoder_q-layer.2": 1324.1484, "encoder_q-layer.3": 1416.016, "encoder_q-layer.4": 1443.5841, "encoder_q-layer.5": 1438.5732, "encoder_q-layer.6": 1403.6036, "encoder_q-layer.7": 1500.0533, "encoder_q-layer.8": 1456.3594, "encoder_q-layer.9": 1164.2395, "epoch": 0.45, "inbatch_neg_score": 0.1906, "inbatch_pos_score": 0.8179, "learning_rate": 2.9666666666666672e-05, "loss": 3.5292, "norm_diff": 0.0513, "norm_loss": 0.0, "num_token_doc": 66.8238, "num_token_overlap": 14.6059, "num_token_query": 37.3255, "num_token_union": 65.3807, "num_word_context": 202.3341, "num_word_doc": 49.8852, "num_word_query": 27.9388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2300.8136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1895, "query_norm": 1.3557, "queue_k_norm": 1.4038, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3255, "sent_len_1": 66.8238, "sent_len_max_0": 128.0, "sent_len_max_1": 187.3512, "stdk": 0.0483, "stdq": 0.0442, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5303, "doc_norm": 1.4035, "encoder_q-embeddings": 1151.0858, "encoder_q-layer.0": 764.4438, "encoder_q-layer.1": 818.7394, "encoder_q-layer.10": 1060.9141, "encoder_q-layer.11": 2648.085, "encoder_q-layer.2": 930.8685, "encoder_q-layer.3": 957.8647, "encoder_q-layer.4": 1013.3605, "encoder_q-layer.5": 1051.4891, "encoder_q-layer.6": 1096.2081, "encoder_q-layer.7": 1128.0879, "encoder_q-layer.8": 1159.6006, "encoder_q-layer.9": 1058.0991, "epoch": 0.46, "inbatch_neg_score": 0.1899, "inbatch_pos_score": 0.8159, "learning_rate": 2.961111111111111e-05, "loss": 3.5303, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.8878, "num_token_overlap": 14.6255, "num_token_query": 37.5202, "num_token_union": 65.5299, "num_word_context": 202.454, "num_word_doc": 49.9172, "num_word_query": 28.0915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1875.2707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1902, "query_norm": 1.3439, "queue_k_norm": 1.403, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5202, "sent_len_1": 66.8878, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4963, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5048, "doc_norm": 1.3992, "encoder_q-embeddings": 3678.9392, "encoder_q-layer.0": 3111.0811, "encoder_q-layer.1": 3402.6584, "encoder_q-layer.10": 1367.8889, "encoder_q-layer.11": 2903.8523, "encoder_q-layer.2": 3560.3718, "encoder_q-layer.3": 2402.1704, "encoder_q-layer.4": 1932.4607, "encoder_q-layer.5": 1691.0354, "encoder_q-layer.6": 1651.0553, "encoder_q-layer.7": 1492.9169, "encoder_q-layer.8": 1628.1377, "encoder_q-layer.9": 1235.7899, "epoch": 0.46, "inbatch_neg_score": 0.1887, "inbatch_pos_score": 0.8066, "learning_rate": 2.955555555555556e-05, "loss": 3.5048, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.8468, "num_token_overlap": 14.6095, "num_token_query": 37.3704, "num_token_union": 65.4509, "num_word_context": 202.4415, "num_word_doc": 49.8754, "num_word_query": 27.9441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3856.4646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.3366, "queue_k_norm": 1.4035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3704, "sent_len_1": 66.8468, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7175, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5326, "doc_norm": 1.3988, "encoder_q-embeddings": 1392.5874, "encoder_q-layer.0": 922.7446, "encoder_q-layer.1": 1025.3124, "encoder_q-layer.10": 1234.8101, "encoder_q-layer.11": 2949.9482, "encoder_q-layer.2": 1162.3696, "encoder_q-layer.3": 1209.8396, "encoder_q-layer.4": 1412.3269, "encoder_q-layer.5": 1310.5933, "encoder_q-layer.6": 1333.5029, "encoder_q-layer.7": 1539.761, "encoder_q-layer.8": 1517.4388, "encoder_q-layer.9": 1213.2598, "epoch": 0.46, "inbatch_neg_score": 0.1986, "inbatch_pos_score": 0.7969, "learning_rate": 2.95e-05, "loss": 3.5326, "norm_diff": 0.069, "norm_loss": 0.0, "num_token_doc": 66.83, "num_token_overlap": 14.5318, "num_token_query": 37.3643, "num_token_union": 65.4465, "num_word_context": 202.157, "num_word_doc": 49.8145, "num_word_query": 27.9826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2207.5669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1971, "query_norm": 1.3298, "queue_k_norm": 1.4037, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3643, "sent_len_1": 66.83, "sent_len_max_0": 128.0, "sent_len_max_1": 192.6488, "stdk": 0.0479, "stdq": 0.0433, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.5206, "doc_norm": 1.4071, "encoder_q-embeddings": 961.3353, "encoder_q-layer.0": 649.3047, "encoder_q-layer.1": 657.9501, "encoder_q-layer.10": 1071.4685, "encoder_q-layer.11": 2765.2603, "encoder_q-layer.2": 728.4841, "encoder_q-layer.3": 743.5482, "encoder_q-layer.4": 770.89, "encoder_q-layer.5": 791.2997, "encoder_q-layer.6": 856.0339, "encoder_q-layer.7": 952.5289, "encoder_q-layer.8": 1165.6897, "encoder_q-layer.9": 1079.451, "epoch": 0.46, "inbatch_neg_score": 0.204, "inbatch_pos_score": 0.8535, "learning_rate": 2.9444444444444448e-05, "loss": 3.5206, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.9918, "num_token_overlap": 14.6423, "num_token_query": 37.4435, "num_token_union": 65.5128, "num_word_context": 202.4739, "num_word_doc": 49.9684, "num_word_query": 28.018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1733.7465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2026, "query_norm": 1.3526, "queue_k_norm": 1.4039, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4435, "sent_len_1": 66.9918, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.385, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5105, "doc_norm": 1.4124, "encoder_q-embeddings": 1133.5525, "encoder_q-layer.0": 749.0834, "encoder_q-layer.1": 813.3344, "encoder_q-layer.10": 1206.0867, "encoder_q-layer.11": 2826.8799, "encoder_q-layer.2": 930.3179, "encoder_q-layer.3": 926.1533, "encoder_q-layer.4": 1006.8683, "encoder_q-layer.5": 967.3743, "encoder_q-layer.6": 1094.3568, "encoder_q-layer.7": 1142.2656, "encoder_q-layer.8": 1287.0558, "encoder_q-layer.9": 1123.4088, "epoch": 0.46, "inbatch_neg_score": 0.2006, "inbatch_pos_score": 0.8281, "learning_rate": 2.9388888888888887e-05, "loss": 3.5105, "norm_diff": 0.0727, "norm_loss": 0.0, "num_token_doc": 66.6444, "num_token_overlap": 14.603, "num_token_query": 37.3972, "num_token_union": 65.3856, "num_word_context": 202.2125, "num_word_doc": 49.7087, "num_word_query": 27.999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1904.9283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2002, "query_norm": 1.3397, "queue_k_norm": 1.4037, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3972, "sent_len_1": 66.6444, "sent_len_max_0": 127.9775, "sent_len_max_1": 188.3413, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.5165, "doc_norm": 1.4088, "encoder_q-embeddings": 1424.5262, "encoder_q-layer.0": 939.5247, "encoder_q-layer.1": 1027.1525, "encoder_q-layer.10": 1200.325, "encoder_q-layer.11": 2703.2612, "encoder_q-layer.2": 1268.3132, "encoder_q-layer.3": 1289.6188, "encoder_q-layer.4": 1273.9414, "encoder_q-layer.5": 1205.7015, "encoder_q-layer.6": 1282.5338, "encoder_q-layer.7": 1293.0868, "encoder_q-layer.8": 1426.9027, "encoder_q-layer.9": 1161.4767, "epoch": 0.46, "inbatch_neg_score": 0.1986, "inbatch_pos_score": 0.8403, "learning_rate": 2.9333333333333336e-05, "loss": 3.5165, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 66.9039, "num_token_overlap": 14.6438, "num_token_query": 37.3353, "num_token_union": 65.4675, "num_word_context": 202.573, "num_word_doc": 49.926, "num_word_query": 27.9435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2105.7558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.3317, "queue_k_norm": 1.4033, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3353, "sent_len_1": 66.9039, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3212, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.5291, "doc_norm": 1.4031, "encoder_q-embeddings": 3713.4934, "encoder_q-layer.0": 2619.9597, "encoder_q-layer.1": 3071.0964, "encoder_q-layer.10": 1200.4272, "encoder_q-layer.11": 2987.6055, "encoder_q-layer.2": 3765.9321, "encoder_q-layer.3": 3993.8086, "encoder_q-layer.4": 3876.4893, "encoder_q-layer.5": 3470.7671, "encoder_q-layer.6": 3262.9028, "encoder_q-layer.7": 2416.4487, "encoder_q-layer.8": 1689.0292, "encoder_q-layer.9": 1213.27, "epoch": 0.46, "inbatch_neg_score": 0.1999, "inbatch_pos_score": 0.833, "learning_rate": 2.927777777777778e-05, "loss": 3.5291, "norm_diff": 0.0719, "norm_loss": 0.0, "num_token_doc": 66.696, "num_token_overlap": 14.5387, "num_token_query": 37.1823, "num_token_union": 65.2812, "num_word_context": 202.1539, "num_word_doc": 49.8012, "num_word_query": 27.8357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4516.9038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1992, "query_norm": 1.3312, "queue_k_norm": 1.4029, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1823, "sent_len_1": 66.696, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.7275, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5125, "doc_norm": 1.4029, "encoder_q-embeddings": 1056.5814, "encoder_q-layer.0": 735.9429, "encoder_q-layer.1": 838.1533, "encoder_q-layer.10": 571.5137, "encoder_q-layer.11": 1479.9059, "encoder_q-layer.2": 946.7767, "encoder_q-layer.3": 826.6478, "encoder_q-layer.4": 851.632, "encoder_q-layer.5": 833.9569, "encoder_q-layer.6": 917.2313, "encoder_q-layer.7": 864.4535, "encoder_q-layer.8": 794.2228, "encoder_q-layer.9": 597.813, "epoch": 0.46, "inbatch_neg_score": 0.1981, "inbatch_pos_score": 0.8257, "learning_rate": 2.9222222222222224e-05, "loss": 3.5125, "norm_diff": 0.0874, "norm_loss": 0.0, "num_token_doc": 66.8867, "num_token_overlap": 14.6194, "num_token_query": 37.3256, "num_token_union": 65.4367, "num_word_context": 202.7911, "num_word_doc": 49.9737, "num_word_query": 27.9822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1352.4948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.199, "query_norm": 1.3155, "queue_k_norm": 1.4067, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3256, "sent_len_1": 66.8867, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0863, "stdk": 0.0481, "stdq": 0.0439, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.5145, "doc_norm": 1.4091, "encoder_q-embeddings": 652.3926, "encoder_q-layer.0": 418.5078, "encoder_q-layer.1": 461.0539, "encoder_q-layer.10": 548.6246, "encoder_q-layer.11": 1414.8051, "encoder_q-layer.2": 515.5566, "encoder_q-layer.3": 536.1084, "encoder_q-layer.4": 582.4939, "encoder_q-layer.5": 572.9572, "encoder_q-layer.6": 584.8514, "encoder_q-layer.7": 578.6665, "encoder_q-layer.8": 630.634, "encoder_q-layer.9": 560.1904, "epoch": 0.46, "inbatch_neg_score": 0.1905, "inbatch_pos_score": 0.8354, "learning_rate": 2.916666666666667e-05, "loss": 3.5145, "norm_diff": 0.0922, "norm_loss": 0.0, "num_token_doc": 66.8577, "num_token_overlap": 14.7002, "num_token_query": 37.5818, "num_token_union": 65.5105, "num_word_context": 202.5938, "num_word_doc": 49.9292, "num_word_query": 28.1419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 999.6256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1921, "query_norm": 1.3169, "queue_k_norm": 1.4069, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5818, "sent_len_1": 66.8577, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9988, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.531, "doc_norm": 1.4039, "encoder_q-embeddings": 1131.7637, "encoder_q-layer.0": 910.5424, "encoder_q-layer.1": 954.894, "encoder_q-layer.10": 606.2745, "encoder_q-layer.11": 1473.552, "encoder_q-layer.2": 1311.6077, "encoder_q-layer.3": 1490.7959, "encoder_q-layer.4": 1432.5515, "encoder_q-layer.5": 1173.49, "encoder_q-layer.6": 974.8627, "encoder_q-layer.7": 853.1751, "encoder_q-layer.8": 810.4084, "encoder_q-layer.9": 628.9076, "epoch": 0.46, "inbatch_neg_score": 0.1908, "inbatch_pos_score": 0.8242, "learning_rate": 2.9111111111111112e-05, "loss": 3.531, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.6063, "num_token_overlap": 14.5265, "num_token_query": 37.2486, "num_token_union": 65.3143, "num_word_context": 202.3189, "num_word_doc": 49.7047, "num_word_query": 27.8813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1624.4164, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1912, "query_norm": 1.3326, "queue_k_norm": 1.4056, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2486, "sent_len_1": 66.6063, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.7388, "stdk": 0.0481, "stdq": 0.0447, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5035, "doc_norm": 1.4117, "encoder_q-embeddings": 1269.1843, "encoder_q-layer.0": 869.2252, "encoder_q-layer.1": 970.5636, "encoder_q-layer.10": 615.6006, "encoder_q-layer.11": 1573.4236, "encoder_q-layer.2": 1142.8672, "encoder_q-layer.3": 1289.8013, "encoder_q-layer.4": 1469.051, "encoder_q-layer.5": 1371.9636, "encoder_q-layer.6": 1210.53, "encoder_q-layer.7": 1064.5398, "encoder_q-layer.8": 898.1624, "encoder_q-layer.9": 619.7221, "epoch": 0.47, "inbatch_neg_score": 0.1897, "inbatch_pos_score": 0.8291, "learning_rate": 2.9055555555555558e-05, "loss": 3.5035, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.6105, "num_token_overlap": 14.6267, "num_token_query": 37.317, "num_token_union": 65.2338, "num_word_context": 201.9346, "num_word_doc": 49.7052, "num_word_query": 27.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1713.3408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1895, "query_norm": 1.3177, "queue_k_norm": 1.4074, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.317, "sent_len_1": 66.6105, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1887, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.5162, "doc_norm": 1.4033, "encoder_q-embeddings": 614.5594, "encoder_q-layer.0": 401.3016, "encoder_q-layer.1": 429.7337, "encoder_q-layer.10": 634.9761, "encoder_q-layer.11": 1530.6719, "encoder_q-layer.2": 483.7342, "encoder_q-layer.3": 489.9838, "encoder_q-layer.4": 516.1371, "encoder_q-layer.5": 513.2051, "encoder_q-layer.6": 492.4228, "encoder_q-layer.7": 541.2891, "encoder_q-layer.8": 652.5706, "encoder_q-layer.9": 583.3059, "epoch": 0.47, "inbatch_neg_score": 0.1886, "inbatch_pos_score": 0.7891, "learning_rate": 2.9e-05, "loss": 3.5162, "norm_diff": 0.1015, "norm_loss": 0.0, "num_token_doc": 66.7381, "num_token_overlap": 14.6333, "num_token_query": 37.5549, "num_token_union": 65.4406, "num_word_context": 202.3279, "num_word_doc": 49.7837, "num_word_query": 28.1264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 990.7727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.3018, "queue_k_norm": 1.4061, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5549, "sent_len_1": 66.7381, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8375, "stdk": 0.0481, "stdq": 0.0435, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5174, "doc_norm": 1.4048, "encoder_q-embeddings": 572.1226, "encoder_q-layer.0": 383.2956, "encoder_q-layer.1": 400.2879, "encoder_q-layer.10": 559.6143, "encoder_q-layer.11": 1443.0728, "encoder_q-layer.2": 451.3859, "encoder_q-layer.3": 465.6149, "encoder_q-layer.4": 486.6238, "encoder_q-layer.5": 499.0312, "encoder_q-layer.6": 526.4158, "encoder_q-layer.7": 586.1379, "encoder_q-layer.8": 615.656, "encoder_q-layer.9": 535.8164, "epoch": 0.47, "inbatch_neg_score": 0.1854, "inbatch_pos_score": 0.8032, "learning_rate": 2.8944444444444446e-05, "loss": 3.5174, "norm_diff": 0.1205, "norm_loss": 0.0, "num_token_doc": 66.9266, "num_token_overlap": 14.5846, "num_token_query": 37.3578, "num_token_union": 65.5416, "num_word_context": 202.5035, "num_word_doc": 49.9637, "num_word_query": 27.9639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 946.2359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1848, "query_norm": 1.2843, "queue_k_norm": 1.4077, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3578, "sent_len_1": 66.9266, "sent_len_max_0": 127.985, "sent_len_max_1": 188.505, "stdk": 0.0481, "stdq": 0.0429, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4932, "doc_norm": 1.4049, "encoder_q-embeddings": 642.1027, "encoder_q-layer.0": 439.8487, "encoder_q-layer.1": 452.8126, "encoder_q-layer.10": 557.45, "encoder_q-layer.11": 1433.6963, "encoder_q-layer.2": 500.8073, "encoder_q-layer.3": 518.5799, "encoder_q-layer.4": 548.6406, "encoder_q-layer.5": 548.7627, "encoder_q-layer.6": 547.7966, "encoder_q-layer.7": 515.8646, "encoder_q-layer.8": 623.1125, "encoder_q-layer.9": 561.8312, "epoch": 0.47, "inbatch_neg_score": 0.182, "inbatch_pos_score": 0.8335, "learning_rate": 2.8888888888888888e-05, "loss": 3.4932, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.7861, "num_token_overlap": 14.7109, "num_token_query": 37.6843, "num_token_union": 65.4715, "num_word_context": 202.1987, "num_word_doc": 49.8436, "num_word_query": 28.2347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1006.3687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1824, "query_norm": 1.3293, "queue_k_norm": 1.4046, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.6843, "sent_len_1": 66.7861, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.305, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.517, "doc_norm": 1.4067, "encoder_q-embeddings": 487.0671, "encoder_q-layer.0": 330.2661, "encoder_q-layer.1": 344.6312, "encoder_q-layer.10": 591.0149, "encoder_q-layer.11": 1471.8251, "encoder_q-layer.2": 378.8732, "encoder_q-layer.3": 386.8369, "encoder_q-layer.4": 392.9952, "encoder_q-layer.5": 385.9548, "encoder_q-layer.6": 442.2757, "encoder_q-layer.7": 509.5533, "encoder_q-layer.8": 593.5582, "encoder_q-layer.9": 541.3041, "epoch": 0.47, "inbatch_neg_score": 0.1784, "inbatch_pos_score": 0.8042, "learning_rate": 2.8833333333333334e-05, "loss": 3.517, "norm_diff": 0.111, "norm_loss": 0.0, "num_token_doc": 66.5736, "num_token_overlap": 14.5316, "num_token_query": 37.2496, "num_token_union": 65.2652, "num_word_context": 202.1005, "num_word_doc": 49.6426, "num_word_query": 27.8987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 895.5601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1775, "query_norm": 1.2957, "queue_k_norm": 1.4044, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2496, "sent_len_1": 66.5736, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.7887, "stdk": 0.0482, "stdq": 0.0434, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.5027, "doc_norm": 1.4058, "encoder_q-embeddings": 521.3029, "encoder_q-layer.0": 338.9977, "encoder_q-layer.1": 349.2332, "encoder_q-layer.10": 613.3196, "encoder_q-layer.11": 1411.8483, "encoder_q-layer.2": 391.2078, "encoder_q-layer.3": 395.8361, "encoder_q-layer.4": 407.071, "encoder_q-layer.5": 418.742, "encoder_q-layer.6": 454.013, "encoder_q-layer.7": 473.0954, "encoder_q-layer.8": 577.8377, "encoder_q-layer.9": 551.6736, "epoch": 0.47, "inbatch_neg_score": 0.1686, "inbatch_pos_score": 0.7988, "learning_rate": 2.877777777777778e-05, "loss": 3.5027, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.5949, "num_token_overlap": 14.5485, "num_token_query": 37.353, "num_token_union": 65.2944, "num_word_context": 202.4268, "num_word_doc": 49.7219, "num_word_query": 27.9701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 909.9861, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1689, "query_norm": 1.3107, "queue_k_norm": 1.4042, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.353, "sent_len_1": 66.5949, "sent_len_max_0": 127.9725, "sent_len_max_1": 188.1225, "stdk": 0.0483, "stdq": 0.0442, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4998, "doc_norm": 1.3993, "encoder_q-embeddings": 818.8442, "encoder_q-layer.0": 577.6819, "encoder_q-layer.1": 642.3834, "encoder_q-layer.10": 599.1064, "encoder_q-layer.11": 1358.7002, "encoder_q-layer.2": 807.336, "encoder_q-layer.3": 802.559, "encoder_q-layer.4": 846.7601, "encoder_q-layer.5": 866.4884, "encoder_q-layer.6": 630.8627, "encoder_q-layer.7": 668.3223, "encoder_q-layer.8": 697.3548, "encoder_q-layer.9": 554.9044, "epoch": 0.47, "inbatch_neg_score": 0.169, "inbatch_pos_score": 0.7988, "learning_rate": 2.8722222222222222e-05, "loss": 3.4998, "norm_diff": 0.0956, "norm_loss": 0.0, "num_token_doc": 66.6532, "num_token_overlap": 14.6079, "num_token_query": 37.3731, "num_token_union": 65.3468, "num_word_context": 202.1601, "num_word_doc": 49.6912, "num_word_query": 27.9712, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1188.136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1677, "query_norm": 1.3037, "queue_k_norm": 1.4036, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3731, "sent_len_1": 66.6532, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2962, "stdk": 0.048, "stdq": 0.0438, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5222, "doc_norm": 1.4045, "encoder_q-embeddings": 574.4028, "encoder_q-layer.0": 382.8913, "encoder_q-layer.1": 408.502, "encoder_q-layer.10": 526.0402, "encoder_q-layer.11": 1416.0972, "encoder_q-layer.2": 437.2966, "encoder_q-layer.3": 431.0534, "encoder_q-layer.4": 451.4705, "encoder_q-layer.5": 438.8194, "encoder_q-layer.6": 489.4444, "encoder_q-layer.7": 539.6294, "encoder_q-layer.8": 634.0321, "encoder_q-layer.9": 551.8002, "epoch": 0.47, "inbatch_neg_score": 0.1692, "inbatch_pos_score": 0.8081, "learning_rate": 2.8666666666666668e-05, "loss": 3.5222, "norm_diff": 0.0984, "norm_loss": 0.0, "num_token_doc": 66.7285, "num_token_overlap": 14.5093, "num_token_query": 37.1304, "num_token_union": 65.2845, "num_word_context": 202.5044, "num_word_doc": 49.7973, "num_word_query": 27.7967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 956.4491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1692, "query_norm": 1.3061, "queue_k_norm": 1.4034, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1304, "sent_len_1": 66.7285, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1637, "stdk": 0.0483, "stdq": 0.044, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5038, "doc_norm": 1.3988, "encoder_q-embeddings": 524.9772, "encoder_q-layer.0": 355.2953, "encoder_q-layer.1": 360.1192, "encoder_q-layer.10": 571.6943, "encoder_q-layer.11": 1382.5742, "encoder_q-layer.2": 391.0661, "encoder_q-layer.3": 389.7892, "encoder_q-layer.4": 407.7448, "encoder_q-layer.5": 417.8929, "encoder_q-layer.6": 455.5387, "encoder_q-layer.7": 546.5485, "encoder_q-layer.8": 613.8848, "encoder_q-layer.9": 570.2042, "epoch": 0.47, "inbatch_neg_score": 0.1725, "inbatch_pos_score": 0.7959, "learning_rate": 2.861111111111111e-05, "loss": 3.5038, "norm_diff": 0.0805, "norm_loss": 0.0, "num_token_doc": 66.7411, "num_token_overlap": 14.5672, "num_token_query": 37.4243, "num_token_union": 65.4473, "num_word_context": 202.314, "num_word_doc": 49.7846, "num_word_query": 28.0474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 904.4809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1711, "query_norm": 1.3183, "queue_k_norm": 1.4015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4243, "sent_len_1": 66.7411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1238, "stdk": 0.048, "stdq": 0.0443, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4791, "doc_norm": 1.4103, "encoder_q-embeddings": 586.755, "encoder_q-layer.0": 381.6543, "encoder_q-layer.1": 383.988, "encoder_q-layer.10": 566.2032, "encoder_q-layer.11": 1442.6561, "encoder_q-layer.2": 432.9988, "encoder_q-layer.3": 456.9493, "encoder_q-layer.4": 517.0295, "encoder_q-layer.5": 543.6503, "encoder_q-layer.6": 580.3741, "encoder_q-layer.7": 632.4322, "encoder_q-layer.8": 699.6921, "encoder_q-layer.9": 588.7496, "epoch": 0.47, "inbatch_neg_score": 0.1712, "inbatch_pos_score": 0.7891, "learning_rate": 2.855555555555556e-05, "loss": 3.4791, "norm_diff": 0.1057, "norm_loss": 0.0, "num_token_doc": 66.9033, "num_token_overlap": 14.6154, "num_token_query": 37.3632, "num_token_union": 65.464, "num_word_context": 202.2584, "num_word_doc": 49.904, "num_word_query": 27.9856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1010.9947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1714, "query_norm": 1.3046, "queue_k_norm": 1.4003, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3632, "sent_len_1": 66.9033, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7225, "stdk": 0.0485, "stdq": 0.0434, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.518, "doc_norm": 1.3966, "encoder_q-embeddings": 570.6162, "encoder_q-layer.0": 383.4421, "encoder_q-layer.1": 427.6181, "encoder_q-layer.10": 546.0416, "encoder_q-layer.11": 1476.8687, "encoder_q-layer.2": 513.1459, "encoder_q-layer.3": 506.6414, "encoder_q-layer.4": 507.2305, "encoder_q-layer.5": 522.426, "encoder_q-layer.6": 568.1672, "encoder_q-layer.7": 580.2317, "encoder_q-layer.8": 655.4792, "encoder_q-layer.9": 568.1771, "epoch": 0.48, "inbatch_neg_score": 0.1692, "inbatch_pos_score": 0.8198, "learning_rate": 2.8499999999999998e-05, "loss": 3.518, "norm_diff": 0.0884, "norm_loss": 0.0, "num_token_doc": 66.6969, "num_token_overlap": 14.5586, "num_token_query": 37.2543, "num_token_union": 65.2955, "num_word_context": 201.6436, "num_word_doc": 49.7861, "num_word_query": 27.9032, "postclip_grad_norm": 1.0, "preclip_grad_norm": 993.1224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.168, "query_norm": 1.3082, "queue_k_norm": 1.4013, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2543, "sent_len_1": 66.6969, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2425, "stdk": 0.048, "stdq": 0.0435, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.5017, "doc_norm": 1.4007, "encoder_q-embeddings": 1038.4, "encoder_q-layer.0": 749.3175, "encoder_q-layer.1": 791.3775, "encoder_q-layer.10": 585.847, "encoder_q-layer.11": 1449.9481, "encoder_q-layer.2": 884.6301, "encoder_q-layer.3": 1001.9408, "encoder_q-layer.4": 977.9835, "encoder_q-layer.5": 1011.0182, "encoder_q-layer.6": 993.1674, "encoder_q-layer.7": 853.9538, "encoder_q-layer.8": 712.4963, "encoder_q-layer.9": 587.602, "epoch": 0.48, "inbatch_neg_score": 0.1763, "inbatch_pos_score": 0.8218, "learning_rate": 2.8444444444444447e-05, "loss": 3.5017, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 14.5936, "num_token_query": 37.3672, "num_token_union": 65.4052, "num_word_context": 202.3235, "num_word_doc": 49.8537, "num_word_query": 27.9608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1390.3463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1759, "query_norm": 1.3251, "queue_k_norm": 1.4027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3672, "sent_len_1": 66.8413, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4675, "stdk": 0.0482, "stdq": 0.044, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5103, "doc_norm": 1.4018, "encoder_q-embeddings": 605.7223, "encoder_q-layer.0": 397.1777, "encoder_q-layer.1": 423.8122, "encoder_q-layer.10": 532.4493, "encoder_q-layer.11": 1428.7937, "encoder_q-layer.2": 492.1364, "encoder_q-layer.3": 508.9774, "encoder_q-layer.4": 537.3727, "encoder_q-layer.5": 548.033, "encoder_q-layer.6": 607.9173, "encoder_q-layer.7": 629.0731, "encoder_q-layer.8": 650.8972, "encoder_q-layer.9": 544.2053, "epoch": 0.48, "inbatch_neg_score": 0.1775, "inbatch_pos_score": 0.7998, "learning_rate": 2.8388888888888893e-05, "loss": 3.5103, "norm_diff": 0.0825, "norm_loss": 0.0, "num_token_doc": 66.6893, "num_token_overlap": 14.5344, "num_token_query": 37.1743, "num_token_union": 65.2491, "num_word_context": 202.3404, "num_word_doc": 49.7079, "num_word_query": 27.8419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.2149, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1774, "query_norm": 1.3193, "queue_k_norm": 1.4015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1743, "sent_len_1": 66.6893, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.37, "stdk": 0.0482, "stdq": 0.0438, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4935, "doc_norm": 1.404, "encoder_q-embeddings": 1200.6161, "encoder_q-layer.0": 822.8638, "encoder_q-layer.1": 964.2441, "encoder_q-layer.10": 593.1063, "encoder_q-layer.11": 1469.2743, "encoder_q-layer.2": 1103.3984, "encoder_q-layer.3": 1168.4519, "encoder_q-layer.4": 1267.8397, "encoder_q-layer.5": 1113.2858, "encoder_q-layer.6": 1170.2903, "encoder_q-layer.7": 1070.3284, "encoder_q-layer.8": 898.4681, "encoder_q-layer.9": 662.0114, "epoch": 0.48, "inbatch_neg_score": 0.1815, "inbatch_pos_score": 0.7954, "learning_rate": 2.8333333333333335e-05, "loss": 3.4935, "norm_diff": 0.0869, "norm_loss": 0.0, "num_token_doc": 67.1445, "num_token_overlap": 14.6762, "num_token_query": 37.5796, "num_token_union": 65.6658, "num_word_context": 202.8732, "num_word_doc": 50.0569, "num_word_query": 28.1522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1616.9497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1815, "query_norm": 1.3171, "queue_k_norm": 1.402, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5796, "sent_len_1": 67.1445, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1025, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5135, "doc_norm": 1.4033, "encoder_q-embeddings": 590.3066, "encoder_q-layer.0": 388.2684, "encoder_q-layer.1": 387.7005, "encoder_q-layer.10": 565.0146, "encoder_q-layer.11": 1421.0732, "encoder_q-layer.2": 412.2831, "encoder_q-layer.3": 412.2249, "encoder_q-layer.4": 433.5714, "encoder_q-layer.5": 408.7741, "encoder_q-layer.6": 457.6465, "encoder_q-layer.7": 498.5986, "encoder_q-layer.8": 615.9677, "encoder_q-layer.9": 555.6099, "epoch": 0.48, "inbatch_neg_score": 0.1829, "inbatch_pos_score": 0.8247, "learning_rate": 2.827777777777778e-05, "loss": 3.5135, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.7213, "num_token_overlap": 14.5494, "num_token_query": 37.2142, "num_token_union": 65.3126, "num_word_context": 202.3027, "num_word_doc": 49.7848, "num_word_query": 27.8417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 939.2567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1832, "query_norm": 1.3483, "queue_k_norm": 1.4014, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2142, "sent_len_1": 66.7213, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.1287, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.488, "doc_norm": 1.4009, "encoder_q-embeddings": 527.5965, "encoder_q-layer.0": 345.704, "encoder_q-layer.1": 364.5543, "encoder_q-layer.10": 564.5417, "encoder_q-layer.11": 1486.3907, "encoder_q-layer.2": 404.0021, "encoder_q-layer.3": 410.762, "encoder_q-layer.4": 451.2896, "encoder_q-layer.5": 436.8969, "encoder_q-layer.6": 520.6073, "encoder_q-layer.7": 553.4225, "encoder_q-layer.8": 641.7272, "encoder_q-layer.9": 555.5479, "epoch": 0.48, "inbatch_neg_score": 0.1884, "inbatch_pos_score": 0.8013, "learning_rate": 2.8222222222222223e-05, "loss": 3.488, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.9438, "num_token_overlap": 14.5907, "num_token_query": 37.2853, "num_token_union": 65.3788, "num_word_context": 202.2324, "num_word_doc": 49.9248, "num_word_query": 27.9375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 956.3725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1862, "query_norm": 1.3434, "queue_k_norm": 1.4056, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2853, "sent_len_1": 66.9438, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.9025, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.482, "doc_norm": 1.4012, "encoder_q-embeddings": 692.2476, "encoder_q-layer.0": 451.3062, "encoder_q-layer.1": 505.4796, "encoder_q-layer.10": 579.5895, "encoder_q-layer.11": 1451.9802, "encoder_q-layer.2": 585.436, "encoder_q-layer.3": 639.4454, "encoder_q-layer.4": 712.5288, "encoder_q-layer.5": 711.3281, "encoder_q-layer.6": 744.894, "encoder_q-layer.7": 783.5433, "encoder_q-layer.8": 754.5755, "encoder_q-layer.9": 582.6331, "epoch": 0.48, "inbatch_neg_score": 0.1951, "inbatch_pos_score": 0.833, "learning_rate": 2.816666666666667e-05, "loss": 3.482, "norm_diff": 0.045, "norm_loss": 0.0, "num_token_doc": 66.9219, "num_token_overlap": 14.6784, "num_token_query": 37.405, "num_token_union": 65.4572, "num_word_context": 202.6773, "num_word_doc": 49.9808, "num_word_query": 28.021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1134.5701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1951, "query_norm": 1.3562, "queue_k_norm": 1.4026, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.405, "sent_len_1": 66.9219, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.4963, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.4979, "doc_norm": 1.4091, "encoder_q-embeddings": 1353.819, "encoder_q-layer.0": 939.6494, "encoder_q-layer.1": 971.869, "encoder_q-layer.10": 1340.1202, "encoder_q-layer.11": 2967.6599, "encoder_q-layer.2": 1047.1968, "encoder_q-layer.3": 1067.3973, "encoder_q-layer.4": 1090.2642, "encoder_q-layer.5": 1064.9768, "encoder_q-layer.6": 1094.7643, "encoder_q-layer.7": 1164.8013, "encoder_q-layer.8": 1368.141, "encoder_q-layer.9": 1235.7871, "epoch": 0.48, "inbatch_neg_score": 0.2024, "inbatch_pos_score": 0.8447, "learning_rate": 2.811111111111111e-05, "loss": 3.4979, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.8178, "num_token_overlap": 14.5844, "num_token_query": 37.3981, "num_token_union": 65.49, "num_word_context": 202.2273, "num_word_doc": 49.8425, "num_word_query": 27.9942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2089.751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.202, "query_norm": 1.3495, "queue_k_norm": 1.4028, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3981, "sent_len_1": 66.8178, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.315, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4928, "doc_norm": 1.4046, "encoder_q-embeddings": 1282.9208, "encoder_q-layer.0": 836.4722, "encoder_q-layer.1": 938.6756, "encoder_q-layer.10": 1116.1425, "encoder_q-layer.11": 2763.6333, "encoder_q-layer.2": 1072.0798, "encoder_q-layer.3": 1105.3905, "encoder_q-layer.4": 1104.0516, "encoder_q-layer.5": 1108.3347, "encoder_q-layer.6": 1309.3329, "encoder_q-layer.7": 1328.7561, "encoder_q-layer.8": 1369.9414, "encoder_q-layer.9": 1119.0492, "epoch": 0.48, "inbatch_neg_score": 0.2097, "inbatch_pos_score": 0.8486, "learning_rate": 2.8055555555555557e-05, "loss": 3.4928, "norm_diff": 0.044, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 14.5728, "num_token_query": 37.3671, "num_token_union": 65.4926, "num_word_context": 202.608, "num_word_doc": 49.9082, "num_word_query": 27.9708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2066.9704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2095, "query_norm": 1.3605, "queue_k_norm": 1.4054, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3671, "sent_len_1": 66.883, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7825, "stdk": 0.0482, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4876, "doc_norm": 1.4059, "encoder_q-embeddings": 1193.5068, "encoder_q-layer.0": 766.266, "encoder_q-layer.1": 819.1419, "encoder_q-layer.10": 1141.7324, "encoder_q-layer.11": 3009.0557, "encoder_q-layer.2": 921.4651, "encoder_q-layer.3": 1005.4431, "encoder_q-layer.4": 1071.7582, "encoder_q-layer.5": 1024.0172, "encoder_q-layer.6": 1125.9736, "encoder_q-layer.7": 1185.943, "encoder_q-layer.8": 1381.2694, "encoder_q-layer.9": 1166.4039, "epoch": 0.48, "inbatch_neg_score": 0.2125, "inbatch_pos_score": 0.8535, "learning_rate": 2.8000000000000003e-05, "loss": 3.4876, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.9736, "num_token_overlap": 14.6292, "num_token_query": 37.4188, "num_token_union": 65.5547, "num_word_context": 202.4334, "num_word_doc": 49.9541, "num_word_query": 28.0039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2005.0454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2122, "query_norm": 1.3723, "queue_k_norm": 1.4057, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4188, "sent_len_1": 66.9736, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.775, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5106, "doc_norm": 1.4103, "encoder_q-embeddings": 1350.4346, "encoder_q-layer.0": 926.8129, "encoder_q-layer.1": 993.647, "encoder_q-layer.10": 1165.9799, "encoder_q-layer.11": 2923.9639, "encoder_q-layer.2": 1138.7395, "encoder_q-layer.3": 1260.255, "encoder_q-layer.4": 1445.5776, "encoder_q-layer.5": 1407.9277, "encoder_q-layer.6": 1487.3617, "encoder_q-layer.7": 1637.2806, "encoder_q-layer.8": 1563.8547, "encoder_q-layer.9": 1138.7556, "epoch": 0.49, "inbatch_neg_score": 0.2163, "inbatch_pos_score": 0.8628, "learning_rate": 2.7944444444444445e-05, "loss": 3.5106, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.9194, "num_token_overlap": 14.5549, "num_token_query": 37.1217, "num_token_union": 65.4139, "num_word_context": 202.1182, "num_word_doc": 49.9605, "num_word_query": 27.798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2261.4472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.215, "query_norm": 1.361, "queue_k_norm": 1.4068, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1217, "sent_len_1": 66.9194, "sent_len_max_0": 127.995, "sent_len_max_1": 188.8988, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.5033, "doc_norm": 1.4104, "encoder_q-embeddings": 1112.4755, "encoder_q-layer.0": 757.0972, "encoder_q-layer.1": 804.5612, "encoder_q-layer.10": 1159.408, "encoder_q-layer.11": 2927.2805, "encoder_q-layer.2": 896.9981, "encoder_q-layer.3": 930.7857, "encoder_q-layer.4": 999.805, "encoder_q-layer.5": 980.8254, "encoder_q-layer.6": 1068.2715, "encoder_q-layer.7": 1117.1965, "encoder_q-layer.8": 1291.6064, "encoder_q-layer.9": 1123.5645, "epoch": 0.49, "inbatch_neg_score": 0.2149, "inbatch_pos_score": 0.8579, "learning_rate": 2.788888888888889e-05, "loss": 3.5033, "norm_diff": 0.0658, "norm_loss": 0.0, "num_token_doc": 66.9331, "num_token_overlap": 14.5439, "num_token_query": 37.2222, "num_token_union": 65.4221, "num_word_context": 202.4396, "num_word_doc": 49.89, "num_word_query": 27.8592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1923.5926, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2136, "query_norm": 1.3446, "queue_k_norm": 1.4098, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2222, "sent_len_1": 66.9331, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6738, "stdk": 0.0483, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5252, "doc_norm": 1.4026, "encoder_q-embeddings": 1860.7645, "encoder_q-layer.0": 1404.7081, "encoder_q-layer.1": 1494.2883, "encoder_q-layer.10": 1112.5676, "encoder_q-layer.11": 2890.5664, "encoder_q-layer.2": 1603.5046, "encoder_q-layer.3": 1643.0793, "encoder_q-layer.4": 1641.4427, "encoder_q-layer.5": 1575.8087, "encoder_q-layer.6": 1664.1104, "encoder_q-layer.7": 1505.1467, "encoder_q-layer.8": 1442.8218, "encoder_q-layer.9": 1113.9376, "epoch": 0.49, "inbatch_neg_score": 0.2053, "inbatch_pos_score": 0.8184, "learning_rate": 2.7833333333333333e-05, "loss": 3.5252, "norm_diff": 0.0836, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 14.5094, "num_token_query": 37.1937, "num_token_union": 65.3603, "num_word_context": 202.3592, "num_word_doc": 49.7811, "num_word_query": 27.818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2507.943, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2054, "query_norm": 1.319, "queue_k_norm": 1.4092, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1937, "sent_len_1": 66.7393, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8237, "stdk": 0.048, "stdq": 0.0436, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5093, "doc_norm": 1.4044, "encoder_q-embeddings": 4280.167, "encoder_q-layer.0": 3211.1951, "encoder_q-layer.1": 3901.6926, "encoder_q-layer.10": 1209.2134, "encoder_q-layer.11": 3121.6567, "encoder_q-layer.2": 4718.2578, "encoder_q-layer.3": 4868.6875, "encoder_q-layer.4": 4851.6641, "encoder_q-layer.5": 4647.0117, "encoder_q-layer.6": 4148.2021, "encoder_q-layer.7": 4052.3584, "encoder_q-layer.8": 2826.6763, "encoder_q-layer.9": 1270.6199, "epoch": 0.49, "inbatch_neg_score": 0.1985, "inbatch_pos_score": 0.8271, "learning_rate": 2.777777777777778e-05, "loss": 3.5093, "norm_diff": 0.0823, "norm_loss": 0.0, "num_token_doc": 66.7353, "num_token_overlap": 14.5442, "num_token_query": 37.1258, "num_token_union": 65.2897, "num_word_context": 201.8495, "num_word_doc": 49.8226, "num_word_query": 27.775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5937.6858, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1996, "query_norm": 1.322, "queue_k_norm": 1.409, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1258, "sent_len_1": 66.7353, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5412, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 28.8395, "dev_samples_per_second": 2.219, "dev_steps_per_second": 0.035, "epoch": 0.49, "step": 50000, "test_accuracy": 93.00537109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.40966591238975525, "test_doc_norm": 1.3894221782684326, "test_inbatch_neg_score": 0.5430465936660767, "test_inbatch_pos_score": 1.4786779880523682, "test_loss": 0.40966591238975525, "test_loss_align": 1.0900835990905762, "test_loss_unif": 3.9094038009643555, "test_loss_unif_q@queue": 3.9094038009643555, "test_norm_diff": 0.05377943068742752, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.18999195098876953, "test_query_norm": 1.4432017803192139, "test_queue_k_norm": 1.4086012840270996, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04253411293029785, "test_stdq": 0.04276827722787857, "test_stdqueue_k": 0.04835142940282822, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.8395, "dev_samples_per_second": 2.219, "dev_steps_per_second": 0.035, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.36213, "eval_beir-arguana_recall@10": 0.61095, "eval_beir-arguana_recall@100": 0.90398, "eval_beir-arguana_recall@20": 0.74182, "eval_beir-avg_ndcg@10": 0.36767525, "eval_beir-avg_recall@10": 0.43581833333333336, "eval_beir-avg_recall@100": 0.6157359166666666, "eval_beir-avg_recall@20": 0.4974310833333334, "eval_beir-cqadupstack_ndcg@10": 0.25434249999999997, "eval_beir-cqadupstack_recall@10": 0.34647333333333324, "eval_beir-cqadupstack_recall@100": 0.5735891666666666, "eval_beir-cqadupstack_recall@20": 0.4132308333333334, "eval_beir-fiqa_ndcg@10": 0.22374, "eval_beir-fiqa_recall@10": 0.27482, "eval_beir-fiqa_recall@100": 0.54878, "eval_beir-fiqa_recall@20": 0.36037, "eval_beir-nfcorpus_ndcg@10": 0.29396, "eval_beir-nfcorpus_recall@10": 0.14481, "eval_beir-nfcorpus_recall@100": 0.28535, "eval_beir-nfcorpus_recall@20": 0.17441, "eval_beir-nq_ndcg@10": 0.25488, "eval_beir-nq_recall@10": 0.42142, "eval_beir-nq_recall@100": 0.75782, "eval_beir-nq_recall@20": 0.5442, "eval_beir-quora_ndcg@10": 0.75066, "eval_beir-quora_recall@10": 0.86697, "eval_beir-quora_recall@100": 0.97225, "eval_beir-quora_recall@20": 0.91465, "eval_beir-scidocs_ndcg@10": 0.13906, "eval_beir-scidocs_recall@10": 0.14678, "eval_beir-scidocs_recall@100": 0.34282, "eval_beir-scidocs_recall@20": 0.20443, "eval_beir-scifact_ndcg@10": 0.61596, "eval_beir-scifact_recall@10": 0.76872, "eval_beir-scifact_recall@100": 0.89322, "eval_beir-scifact_recall@20": 0.82844, "eval_beir-trec-covid_ndcg@10": 0.57975, "eval_beir-trec-covid_recall@10": 0.634, "eval_beir-trec-covid_recall@100": 0.4508, "eval_beir-trec-covid_recall@20": 0.584, "eval_beir-webis-touche2020_ndcg@10": 0.20227, "eval_beir-webis-touche2020_recall@10": 0.14324, "eval_beir-webis-touche2020_recall@100": 0.42875, "eval_beir-webis-touche2020_recall@20": 0.20876, "eval_senteval-avg_sts": 0.7538106626426473, "eval_senteval-sickr_spearman": 0.7214449151079843, "eval_senteval-stsb_spearman": 0.7861764101773102, "step": 50000, "test_accuracy": 93.00537109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.40966591238975525, "test_doc_norm": 1.3894221782684326, "test_inbatch_neg_score": 0.5430465936660767, "test_inbatch_pos_score": 1.4786779880523682, "test_loss": 0.40966591238975525, "test_loss_align": 1.0900835990905762, "test_loss_unif": 3.9094038009643555, "test_loss_unif_q@queue": 3.9094038009643555, "test_norm_diff": 0.05377943068742752, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.18999195098876953, "test_query_norm": 1.4432017803192139, "test_queue_k_norm": 1.4086012840270996, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04253411293029785, "test_stdq": 0.04276827722787857, "test_stdqueue_k": 0.04835142940282822, "test_stdqueue_q": 0.0 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5141, "doc_norm": 1.4128, "encoder_q-embeddings": 1195.6184, "encoder_q-layer.0": 828.0624, "encoder_q-layer.1": 888.1093, "encoder_q-layer.10": 1341.4921, "encoder_q-layer.11": 3053.5928, "encoder_q-layer.2": 1062.729, "encoder_q-layer.3": 1096.1853, "encoder_q-layer.4": 1086.8961, "encoder_q-layer.5": 1032.9053, "encoder_q-layer.6": 1112.9005, "encoder_q-layer.7": 1255.7152, "encoder_q-layer.8": 1399.0396, "encoder_q-layer.9": 1244.8916, "epoch": 0.49, "inbatch_neg_score": 0.1951, "inbatch_pos_score": 0.8457, "learning_rate": 2.772222222222222e-05, "loss": 3.5141, "norm_diff": 0.0739, "norm_loss": 0.0, "num_token_doc": 66.9825, "num_token_overlap": 14.562, "num_token_query": 37.2663, "num_token_union": 65.4529, "num_word_context": 202.6428, "num_word_doc": 50.013, "num_word_query": 27.9267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.9063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1969, "query_norm": 1.3389, "queue_k_norm": 1.4105, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2663, "sent_len_1": 66.9825, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4979, "doc_norm": 1.4122, "encoder_q-embeddings": 1043.0642, "encoder_q-layer.0": 669.0154, "encoder_q-layer.1": 692.1092, "encoder_q-layer.10": 1200.8231, "encoder_q-layer.11": 2969.9419, "encoder_q-layer.2": 769.2218, "encoder_q-layer.3": 783.2499, "encoder_q-layer.4": 825.4865, "encoder_q-layer.5": 838.3724, "encoder_q-layer.6": 988.7681, "encoder_q-layer.7": 1090.8805, "encoder_q-layer.8": 1338.5594, "encoder_q-layer.9": 1210.2544, "epoch": 0.49, "inbatch_neg_score": 0.1994, "inbatch_pos_score": 0.8267, "learning_rate": 2.7666666666666667e-05, "loss": 3.4979, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8826, "num_token_overlap": 14.5713, "num_token_query": 37.2937, "num_token_union": 65.4525, "num_word_context": 202.5148, "num_word_doc": 49.9321, "num_word_query": 27.9403, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1916.5668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1989, "query_norm": 1.3196, "queue_k_norm": 1.4116, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2937, "sent_len_1": 66.8826, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9075, "stdk": 0.0483, "stdq": 0.0439, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4799, "doc_norm": 1.4128, "encoder_q-embeddings": 1290.2546, "encoder_q-layer.0": 877.7415, "encoder_q-layer.1": 962.7773, "encoder_q-layer.10": 1104.423, "encoder_q-layer.11": 2920.5808, "encoder_q-layer.2": 1125.8671, "encoder_q-layer.3": 1168.6193, "encoder_q-layer.4": 1392.9313, "encoder_q-layer.5": 1280.3539, "encoder_q-layer.6": 1226.2399, "encoder_q-layer.7": 1389.7305, "encoder_q-layer.8": 1369.5875, "encoder_q-layer.9": 1090.4678, "epoch": 0.49, "inbatch_neg_score": 0.1949, "inbatch_pos_score": 0.8179, "learning_rate": 2.761111111111111e-05, "loss": 3.4799, "norm_diff": 0.105, "norm_loss": 0.0, "num_token_doc": 66.5537, "num_token_overlap": 14.5748, "num_token_query": 37.4464, "num_token_union": 65.358, "num_word_context": 202.3146, "num_word_doc": 49.6807, "num_word_query": 28.0542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2138.6777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1946, "query_norm": 1.3077, "queue_k_norm": 1.4111, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4464, "sent_len_1": 66.5537, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9525, "stdk": 0.0484, "stdq": 0.0435, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4922, "doc_norm": 1.4029, "encoder_q-embeddings": 1117.0623, "encoder_q-layer.0": 761.7696, "encoder_q-layer.1": 826.5018, "encoder_q-layer.10": 1142.0464, "encoder_q-layer.11": 2917.96, "encoder_q-layer.2": 880.2371, "encoder_q-layer.3": 910.7816, "encoder_q-layer.4": 964.3969, "encoder_q-layer.5": 1006.7987, "encoder_q-layer.6": 1079.4182, "encoder_q-layer.7": 1199.7941, "encoder_q-layer.8": 1368.4132, "encoder_q-layer.9": 1162.7666, "epoch": 0.49, "inbatch_neg_score": 0.19, "inbatch_pos_score": 0.8247, "learning_rate": 2.7555555555555555e-05, "loss": 3.4922, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8558, "num_token_overlap": 14.5743, "num_token_query": 37.2616, "num_token_union": 65.4009, "num_word_context": 202.4635, "num_word_doc": 49.9034, "num_word_query": 27.9274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1982.0638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1887, "query_norm": 1.3103, "queue_k_norm": 1.4115, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2616, "sent_len_1": 66.8558, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.625, "stdk": 0.048, "stdq": 0.0438, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5041, "doc_norm": 1.4064, "encoder_q-embeddings": 1324.2195, "encoder_q-layer.0": 935.0692, "encoder_q-layer.1": 1039.3949, "encoder_q-layer.10": 1278.8328, "encoder_q-layer.11": 3105.6296, "encoder_q-layer.2": 1255.3623, "encoder_q-layer.3": 1305.0017, "encoder_q-layer.4": 1377.2137, "encoder_q-layer.5": 1300.9495, "encoder_q-layer.6": 1308.9493, "encoder_q-layer.7": 1323.9607, "encoder_q-layer.8": 1447.29, "encoder_q-layer.9": 1253.1178, "epoch": 0.49, "inbatch_neg_score": 0.1799, "inbatch_pos_score": 0.8223, "learning_rate": 2.7500000000000004e-05, "loss": 3.5041, "norm_diff": 0.0689, "norm_loss": 0.0, "num_token_doc": 66.7616, "num_token_overlap": 14.5898, "num_token_query": 37.3702, "num_token_union": 65.4173, "num_word_context": 202.2209, "num_word_doc": 49.8224, "num_word_query": 27.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2220.9685, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1808, "query_norm": 1.3375, "queue_k_norm": 1.4089, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3702, "sent_len_1": 66.7616, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7763, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4965, "doc_norm": 1.4132, "encoder_q-embeddings": 1311.8838, "encoder_q-layer.0": 912.8047, "encoder_q-layer.1": 1022.7392, "encoder_q-layer.10": 1088.701, "encoder_q-layer.11": 2901.1099, "encoder_q-layer.2": 1251.046, "encoder_q-layer.3": 1339.6898, "encoder_q-layer.4": 1507.1698, "encoder_q-layer.5": 1316.6743, "encoder_q-layer.6": 1339.793, "encoder_q-layer.7": 1494.4968, "encoder_q-layer.8": 1575.4434, "encoder_q-layer.9": 1258.6384, "epoch": 0.49, "inbatch_neg_score": 0.183, "inbatch_pos_score": 0.8184, "learning_rate": 2.7444444444444443e-05, "loss": 3.4965, "norm_diff": 0.1059, "norm_loss": 0.0, "num_token_doc": 66.7366, "num_token_overlap": 14.6065, "num_token_query": 37.296, "num_token_union": 65.3068, "num_word_context": 202.1532, "num_word_doc": 49.8251, "num_word_query": 27.9255, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2207.7944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1829, "query_norm": 1.3073, "queue_k_norm": 1.4096, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.296, "sent_len_1": 66.7366, "sent_len_max_0": 128.0, "sent_len_max_1": 188.895, "stdk": 0.0484, "stdq": 0.0436, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4719, "doc_norm": 1.4089, "encoder_q-embeddings": 1185.2979, "encoder_q-layer.0": 792.9692, "encoder_q-layer.1": 843.0334, "encoder_q-layer.10": 1165.9177, "encoder_q-layer.11": 2817.762, "encoder_q-layer.2": 891.9668, "encoder_q-layer.3": 938.7556, "encoder_q-layer.4": 957.7061, "encoder_q-layer.5": 961.2782, "encoder_q-layer.6": 1026.4895, "encoder_q-layer.7": 1069.5898, "encoder_q-layer.8": 1275.485, "encoder_q-layer.9": 1092.7467, "epoch": 0.49, "inbatch_neg_score": 0.1766, "inbatch_pos_score": 0.8193, "learning_rate": 2.7388888888888892e-05, "loss": 3.4719, "norm_diff": 0.0864, "norm_loss": 0.0, "num_token_doc": 66.7703, "num_token_overlap": 14.6422, "num_token_query": 37.3582, "num_token_union": 65.2831, "num_word_context": 202.5956, "num_word_doc": 49.8209, "num_word_query": 27.967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1930.5972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1772, "query_norm": 1.3225, "queue_k_norm": 1.4089, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3582, "sent_len_1": 66.7703, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9875, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4905, "doc_norm": 1.4061, "encoder_q-embeddings": 576.6181, "encoder_q-layer.0": 382.7585, "encoder_q-layer.1": 403.6432, "encoder_q-layer.10": 586.6053, "encoder_q-layer.11": 1443.1099, "encoder_q-layer.2": 464.3289, "encoder_q-layer.3": 482.6979, "encoder_q-layer.4": 512.2728, "encoder_q-layer.5": 530.1501, "encoder_q-layer.6": 537.2454, "encoder_q-layer.7": 592.0739, "encoder_q-layer.8": 696.4651, "encoder_q-layer.9": 599.983, "epoch": 0.5, "inbatch_neg_score": 0.1763, "inbatch_pos_score": 0.8032, "learning_rate": 2.733333333333333e-05, "loss": 3.4905, "norm_diff": 0.0934, "norm_loss": 0.0, "num_token_doc": 66.9235, "num_token_overlap": 14.6062, "num_token_query": 37.276, "num_token_union": 65.4104, "num_word_context": 202.4098, "num_word_doc": 49.9276, "num_word_query": 27.9209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.1939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1757, "query_norm": 1.3128, "queue_k_norm": 1.4088, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.276, "sent_len_1": 66.9235, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.9975, "stdk": 0.0482, "stdq": 0.0438, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4814, "doc_norm": 1.4079, "encoder_q-embeddings": 1018.9221, "encoder_q-layer.0": 717.2776, "encoder_q-layer.1": 758.3184, "encoder_q-layer.10": 667.6338, "encoder_q-layer.11": 1422.8577, "encoder_q-layer.2": 845.848, "encoder_q-layer.3": 861.899, "encoder_q-layer.4": 810.3896, "encoder_q-layer.5": 742.4116, "encoder_q-layer.6": 785.8592, "encoder_q-layer.7": 693.7995, "encoder_q-layer.8": 784.0428, "encoder_q-layer.9": 616.4658, "epoch": 0.5, "inbatch_neg_score": 0.1671, "inbatch_pos_score": 0.8218, "learning_rate": 2.727777777777778e-05, "loss": 3.4814, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.6803, "num_token_overlap": 14.6172, "num_token_query": 37.5841, "num_token_union": 65.4909, "num_word_context": 202.7568, "num_word_doc": 49.78, "num_word_query": 28.1569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1269.1873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1677, "query_norm": 1.3504, "queue_k_norm": 1.4103, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5841, "sent_len_1": 66.6803, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9338, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.5021, "doc_norm": 1.4066, "encoder_q-embeddings": 677.1413, "encoder_q-layer.0": 474.049, "encoder_q-layer.1": 505.5866, "encoder_q-layer.10": 597.524, "encoder_q-layer.11": 1456.7085, "encoder_q-layer.2": 554.3695, "encoder_q-layer.3": 585.8449, "encoder_q-layer.4": 586.4265, "encoder_q-layer.5": 559.8263, "encoder_q-layer.6": 619.1182, "encoder_q-layer.7": 641.6512, "encoder_q-layer.8": 670.067, "encoder_q-layer.9": 564.6248, "epoch": 0.5, "inbatch_neg_score": 0.1741, "inbatch_pos_score": 0.8154, "learning_rate": 2.7222222222222223e-05, "loss": 3.5021, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.9885, "num_token_overlap": 14.5516, "num_token_query": 37.2021, "num_token_union": 65.5132, "num_word_context": 202.6617, "num_word_doc": 50.0264, "num_word_query": 27.8628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1044.2148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1726, "query_norm": 1.3242, "queue_k_norm": 1.4081, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2021, "sent_len_1": 66.9885, "sent_len_max_0": 128.0, "sent_len_max_1": 188.41, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4908, "doc_norm": 1.4161, "encoder_q-embeddings": 540.7896, "encoder_q-layer.0": 342.6113, "encoder_q-layer.1": 352.8054, "encoder_q-layer.10": 623.9066, "encoder_q-layer.11": 1463.6119, "encoder_q-layer.2": 391.8907, "encoder_q-layer.3": 390.5192, "encoder_q-layer.4": 393.6641, "encoder_q-layer.5": 382.7103, "encoder_q-layer.6": 462.6735, "encoder_q-layer.7": 503.0516, "encoder_q-layer.8": 627.8349, "encoder_q-layer.9": 575.4733, "epoch": 0.5, "inbatch_neg_score": 0.1652, "inbatch_pos_score": 0.7954, "learning_rate": 2.716666666666667e-05, "loss": 3.4908, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.6928, "num_token_overlap": 14.5786, "num_token_query": 37.235, "num_token_union": 65.2804, "num_word_context": 201.9378, "num_word_doc": 49.7685, "num_word_query": 27.861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 924.3625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1661, "query_norm": 1.3345, "queue_k_norm": 1.406, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.235, "sent_len_1": 66.6928, "sent_len_max_0": 128.0, "sent_len_max_1": 188.645, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4698, "doc_norm": 1.4078, "encoder_q-embeddings": 592.6345, "encoder_q-layer.0": 402.8358, "encoder_q-layer.1": 428.0094, "encoder_q-layer.10": 594.5399, "encoder_q-layer.11": 1440.5098, "encoder_q-layer.2": 485.7179, "encoder_q-layer.3": 517.3928, "encoder_q-layer.4": 497.3903, "encoder_q-layer.5": 482.6228, "encoder_q-layer.6": 557.5801, "encoder_q-layer.7": 566.8979, "encoder_q-layer.8": 613.5631, "encoder_q-layer.9": 573.2827, "epoch": 0.5, "inbatch_neg_score": 0.1742, "inbatch_pos_score": 0.8066, "learning_rate": 2.7111111111111114e-05, "loss": 3.4698, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 14.6673, "num_token_query": 37.4584, "num_token_union": 65.4733, "num_word_context": 202.2714, "num_word_doc": 49.8574, "num_word_query": 28.0018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 966.2633, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1732, "query_norm": 1.3481, "queue_k_norm": 1.4079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4584, "sent_len_1": 66.883, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.8638, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.463, "doc_norm": 1.4054, "encoder_q-embeddings": 808.7178, "encoder_q-layer.0": 577.8417, "encoder_q-layer.1": 638.1351, "encoder_q-layer.10": 584.2321, "encoder_q-layer.11": 1462.6779, "encoder_q-layer.2": 781.6903, "encoder_q-layer.3": 753.2098, "encoder_q-layer.4": 783.3269, "encoder_q-layer.5": 774.0129, "encoder_q-layer.6": 694.8352, "encoder_q-layer.7": 681.9836, "encoder_q-layer.8": 657.3286, "encoder_q-layer.9": 559.5067, "epoch": 0.5, "inbatch_neg_score": 0.1713, "inbatch_pos_score": 0.8086, "learning_rate": 2.7055555555555557e-05, "loss": 3.463, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.8774, "num_token_overlap": 14.6804, "num_token_query": 37.5299, "num_token_union": 65.4529, "num_word_context": 202.4239, "num_word_doc": 49.8959, "num_word_query": 28.0854, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1180.7438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1726, "query_norm": 1.3432, "queue_k_norm": 1.4073, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5299, "sent_len_1": 66.8774, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.5188, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4883, "doc_norm": 1.4038, "encoder_q-embeddings": 987.5358, "encoder_q-layer.0": 696.7626, "encoder_q-layer.1": 727.5145, "encoder_q-layer.10": 627.5518, "encoder_q-layer.11": 1480.5266, "encoder_q-layer.2": 826.6201, "encoder_q-layer.3": 890.9506, "encoder_q-layer.4": 888.6271, "encoder_q-layer.5": 933.7136, "encoder_q-layer.6": 963.7888, "encoder_q-layer.7": 863.7783, "encoder_q-layer.8": 719.6619, "encoder_q-layer.9": 591.2809, "epoch": 0.5, "inbatch_neg_score": 0.1763, "inbatch_pos_score": 0.8159, "learning_rate": 2.7000000000000002e-05, "loss": 3.4883, "norm_diff": 0.0456, "norm_loss": 0.0, "num_token_doc": 66.6725, "num_token_overlap": 14.5043, "num_token_query": 37.2202, "num_token_union": 65.3397, "num_word_context": 202.4152, "num_word_doc": 49.7729, "num_word_query": 27.8722, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1343.993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.177, "query_norm": 1.3581, "queue_k_norm": 1.4074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2202, "sent_len_1": 66.6725, "sent_len_max_0": 127.995, "sent_len_max_1": 188.22, "stdk": 0.0482, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4734, "doc_norm": 1.4074, "encoder_q-embeddings": 745.3099, "encoder_q-layer.0": 514.0923, "encoder_q-layer.1": 577.8738, "encoder_q-layer.10": 613.3512, "encoder_q-layer.11": 1481.2131, "encoder_q-layer.2": 699.4747, "encoder_q-layer.3": 821.4526, "encoder_q-layer.4": 954.6248, "encoder_q-layer.5": 1036.4751, "encoder_q-layer.6": 1024.4855, "encoder_q-layer.7": 972.6302, "encoder_q-layer.8": 785.7048, "encoder_q-layer.9": 571.0743, "epoch": 0.5, "inbatch_neg_score": 0.1806, "inbatch_pos_score": 0.7983, "learning_rate": 2.6944444444444445e-05, "loss": 3.4734, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.6688, "num_token_overlap": 14.6489, "num_token_query": 37.5574, "num_token_union": 65.4395, "num_word_context": 202.5894, "num_word_doc": 49.8028, "num_word_query": 28.1243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1306.4465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1797, "query_norm": 1.3462, "queue_k_norm": 1.4065, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5574, "sent_len_1": 66.6688, "sent_len_max_0": 128.0, "sent_len_max_1": 186.375, "stdk": 0.0483, "stdq": 0.0437, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4784, "doc_norm": 1.4011, "encoder_q-embeddings": 892.2729, "encoder_q-layer.0": 604.8232, "encoder_q-layer.1": 574.3348, "encoder_q-layer.10": 607.3237, "encoder_q-layer.11": 1449.3571, "encoder_q-layer.2": 611.4819, "encoder_q-layer.3": 644.1887, "encoder_q-layer.4": 683.0444, "encoder_q-layer.5": 706.4928, "encoder_q-layer.6": 751.2581, "encoder_q-layer.7": 801.5913, "encoder_q-layer.8": 702.4951, "encoder_q-layer.9": 582.1508, "epoch": 0.5, "inbatch_neg_score": 0.1867, "inbatch_pos_score": 0.8145, "learning_rate": 2.688888888888889e-05, "loss": 3.4784, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.5802, "num_token_overlap": 14.6048, "num_token_query": 37.4944, "num_token_union": 65.3414, "num_word_context": 202.3889, "num_word_doc": 49.6987, "num_word_query": 28.0949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1172.1597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1855, "query_norm": 1.3532, "queue_k_norm": 1.4066, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4944, "sent_len_1": 66.5802, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.345, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4758, "doc_norm": 1.4072, "encoder_q-embeddings": 791.5286, "encoder_q-layer.0": 580.8879, "encoder_q-layer.1": 639.2565, "encoder_q-layer.10": 620.3939, "encoder_q-layer.11": 1475.5726, "encoder_q-layer.2": 741.3923, "encoder_q-layer.3": 824.7855, "encoder_q-layer.4": 882.5773, "encoder_q-layer.5": 1004.345, "encoder_q-layer.6": 924.8662, "encoder_q-layer.7": 768.5432, "encoder_q-layer.8": 695.8154, "encoder_q-layer.9": 628.6475, "epoch": 0.5, "inbatch_neg_score": 0.191, "inbatch_pos_score": 0.8311, "learning_rate": 2.6833333333333333e-05, "loss": 3.4758, "norm_diff": 0.0386, "norm_loss": 0.0, "num_token_doc": 66.584, "num_token_overlap": 14.6268, "num_token_query": 37.4716, "num_token_union": 65.264, "num_word_context": 201.8579, "num_word_doc": 49.6466, "num_word_query": 28.023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1257.6697, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1907, "query_norm": 1.3686, "queue_k_norm": 1.4085, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4716, "sent_len_1": 66.584, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2237, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4879, "doc_norm": 1.4114, "encoder_q-embeddings": 2129.749, "encoder_q-layer.0": 1478.652, "encoder_q-layer.1": 1572.0046, "encoder_q-layer.10": 637.2684, "encoder_q-layer.11": 1479.0297, "encoder_q-layer.2": 1818.5511, "encoder_q-layer.3": 1877.4941, "encoder_q-layer.4": 2189.2764, "encoder_q-layer.5": 2674.2766, "encoder_q-layer.6": 3436.9458, "encoder_q-layer.7": 3873.8618, "encoder_q-layer.8": 2313.4155, "encoder_q-layer.9": 820.7491, "epoch": 0.51, "inbatch_neg_score": 0.1999, "inbatch_pos_score": 0.8281, "learning_rate": 2.677777777777778e-05, "loss": 3.4879, "norm_diff": 0.0463, "norm_loss": 0.0, "num_token_doc": 66.7217, "num_token_overlap": 14.4912, "num_token_query": 37.1064, "num_token_union": 65.3256, "num_word_context": 202.0591, "num_word_doc": 49.7822, "num_word_query": 27.7791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3298.9902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1984, "query_norm": 1.3651, "queue_k_norm": 1.4088, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1064, "sent_len_1": 66.7217, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.095, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4844, "doc_norm": 1.4012, "encoder_q-embeddings": 622.2187, "encoder_q-layer.0": 399.846, "encoder_q-layer.1": 435.8801, "encoder_q-layer.10": 597.4066, "encoder_q-layer.11": 1441.3589, "encoder_q-layer.2": 509.6486, "encoder_q-layer.3": 494.5564, "encoder_q-layer.4": 509.8333, "encoder_q-layer.5": 546.3559, "encoder_q-layer.6": 561.7705, "encoder_q-layer.7": 610.7036, "encoder_q-layer.8": 695.9498, "encoder_q-layer.9": 608.6114, "epoch": 0.51, "inbatch_neg_score": 0.2027, "inbatch_pos_score": 0.8477, "learning_rate": 2.6722222222222228e-05, "loss": 3.4844, "norm_diff": 0.027, "norm_loss": 0.0, "num_token_doc": 66.834, "num_token_overlap": 14.6094, "num_token_query": 37.288, "num_token_union": 65.454, "num_word_context": 202.4792, "num_word_doc": 49.8963, "num_word_query": 27.9201, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1009.6598, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2021, "query_norm": 1.385, "queue_k_norm": 1.408, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.288, "sent_len_1": 66.834, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4013, "stdk": 0.0481, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.491, "doc_norm": 1.4068, "encoder_q-embeddings": 519.2843, "encoder_q-layer.0": 334.2671, "encoder_q-layer.1": 341.5507, "encoder_q-layer.10": 572.6145, "encoder_q-layer.11": 1479.0588, "encoder_q-layer.2": 364.926, "encoder_q-layer.3": 364.4595, "encoder_q-layer.4": 379.1066, "encoder_q-layer.5": 392.2246, "encoder_q-layer.6": 422.8345, "encoder_q-layer.7": 471.7432, "encoder_q-layer.8": 584.7399, "encoder_q-layer.9": 532.0253, "epoch": 0.51, "inbatch_neg_score": 0.2063, "inbatch_pos_score": 0.8311, "learning_rate": 2.6666666666666667e-05, "loss": 3.491, "norm_diff": 0.072, "norm_loss": 0.0, "num_token_doc": 67.0722, "num_token_overlap": 14.6097, "num_token_query": 37.3397, "num_token_union": 65.5528, "num_word_context": 202.5535, "num_word_doc": 49.9793, "num_word_query": 27.9443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 916.2085, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.205, "query_norm": 1.3348, "queue_k_norm": 1.4085, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3397, "sent_len_1": 67.0722, "sent_len_max_0": 127.9838, "sent_len_max_1": 192.5275, "stdk": 0.0483, "stdq": 0.0436, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4837, "doc_norm": 1.4179, "encoder_q-embeddings": 835.9617, "encoder_q-layer.0": 565.2177, "encoder_q-layer.1": 639.2695, "encoder_q-layer.10": 583.6944, "encoder_q-layer.11": 1446.5485, "encoder_q-layer.2": 689.3884, "encoder_q-layer.3": 738.7766, "encoder_q-layer.4": 778.1631, "encoder_q-layer.5": 755.5538, "encoder_q-layer.6": 709.0887, "encoder_q-layer.7": 707.5698, "encoder_q-layer.8": 678.3953, "encoder_q-layer.9": 548.7383, "epoch": 0.51, "inbatch_neg_score": 0.2039, "inbatch_pos_score": 0.8389, "learning_rate": 2.6611111111111116e-05, "loss": 3.4837, "norm_diff": 0.0729, "norm_loss": 0.0, "num_token_doc": 66.779, "num_token_overlap": 14.6199, "num_token_query": 37.3735, "num_token_union": 65.3833, "num_word_context": 202.266, "num_word_doc": 49.8188, "num_word_query": 27.9993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1173.9, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2037, "query_norm": 1.345, "queue_k_norm": 1.411, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3735, "sent_len_1": 66.779, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8275, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4792, "doc_norm": 1.4095, "encoder_q-embeddings": 2683.6956, "encoder_q-layer.0": 2101.3181, "encoder_q-layer.1": 2333.0242, "encoder_q-layer.10": 619.3844, "encoder_q-layer.11": 1390.6252, "encoder_q-layer.2": 2839.9385, "encoder_q-layer.3": 3073.4656, "encoder_q-layer.4": 3860.0901, "encoder_q-layer.5": 3312.1663, "encoder_q-layer.6": 3159.6772, "encoder_q-layer.7": 2890.9031, "encoder_q-layer.8": 2104.8215, "encoder_q-layer.9": 900.2129, "epoch": 0.51, "inbatch_neg_score": 0.2049, "inbatch_pos_score": 0.8506, "learning_rate": 2.6555555555555555e-05, "loss": 3.4792, "norm_diff": 0.0732, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 14.5642, "num_token_query": 37.3136, "num_token_union": 65.3655, "num_word_context": 202.3609, "num_word_doc": 49.8237, "num_word_query": 27.9439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3815.8335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2035, "query_norm": 1.3362, "queue_k_norm": 1.4108, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3136, "sent_len_1": 66.6933, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9675, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4896, "doc_norm": 1.4141, "encoder_q-embeddings": 739.3622, "encoder_q-layer.0": 523.3152, "encoder_q-layer.1": 573.8258, "encoder_q-layer.10": 606.8864, "encoder_q-layer.11": 1614.9731, "encoder_q-layer.2": 680.5079, "encoder_q-layer.3": 690.1906, "encoder_q-layer.4": 679.0151, "encoder_q-layer.5": 652.8182, "encoder_q-layer.6": 661.7016, "encoder_q-layer.7": 665.2398, "encoder_q-layer.8": 710.2852, "encoder_q-layer.9": 594.1039, "epoch": 0.51, "inbatch_neg_score": 0.2047, "inbatch_pos_score": 0.8481, "learning_rate": 2.6500000000000004e-05, "loss": 3.4896, "norm_diff": 0.0782, "norm_loss": 0.0, "num_token_doc": 66.929, "num_token_overlap": 14.5789, "num_token_query": 37.343, "num_token_union": 65.4478, "num_word_context": 202.4508, "num_word_doc": 49.9079, "num_word_query": 27.9331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1145.8104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2042, "query_norm": 1.3359, "queue_k_norm": 1.4111, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.343, "sent_len_1": 66.929, "sent_len_max_0": 128.0, "sent_len_max_1": 192.85, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.4711, "doc_norm": 1.4118, "encoder_q-embeddings": 589.2728, "encoder_q-layer.0": 407.3524, "encoder_q-layer.1": 423.9417, "encoder_q-layer.10": 572.9377, "encoder_q-layer.11": 1419.7751, "encoder_q-layer.2": 484.9275, "encoder_q-layer.3": 504.6598, "encoder_q-layer.4": 525.3959, "encoder_q-layer.5": 515.694, "encoder_q-layer.6": 559.9672, "encoder_q-layer.7": 626.9236, "encoder_q-layer.8": 631.5001, "encoder_q-layer.9": 532.8778, "epoch": 0.51, "inbatch_neg_score": 0.2023, "inbatch_pos_score": 0.8564, "learning_rate": 2.6444444444444443e-05, "loss": 3.4711, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.6399, "num_token_overlap": 14.6399, "num_token_query": 37.5023, "num_token_union": 65.4045, "num_word_context": 201.9269, "num_word_doc": 49.7175, "num_word_query": 28.1027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.9668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2026, "query_norm": 1.315, "queue_k_norm": 1.4125, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5023, "sent_len_1": 66.6399, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4013, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4678, "doc_norm": 1.4179, "encoder_q-embeddings": 727.2966, "encoder_q-layer.0": 510.775, "encoder_q-layer.1": 566.0778, "encoder_q-layer.10": 540.0757, "encoder_q-layer.11": 1503.0192, "encoder_q-layer.2": 618.9207, "encoder_q-layer.3": 650.0322, "encoder_q-layer.4": 638.721, "encoder_q-layer.5": 670.2627, "encoder_q-layer.6": 708.0766, "encoder_q-layer.7": 719.7633, "encoder_q-layer.8": 685.9202, "encoder_q-layer.9": 562.5234, "epoch": 0.51, "inbatch_neg_score": 0.1988, "inbatch_pos_score": 0.8408, "learning_rate": 2.6388888888888892e-05, "loss": 3.4678, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.9042, "num_token_overlap": 14.7269, "num_token_query": 37.5706, "num_token_union": 65.4655, "num_word_context": 202.8128, "num_word_doc": 49.9423, "num_word_query": 28.1562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1125.8688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1986, "query_norm": 1.3228, "queue_k_norm": 1.4113, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5706, "sent_len_1": 66.9042, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4863, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.4786, "doc_norm": 1.4172, "encoder_q-embeddings": 2171.9216, "encoder_q-layer.0": 1682.8125, "encoder_q-layer.1": 1792.1301, "encoder_q-layer.10": 548.98, "encoder_q-layer.11": 1422.3286, "encoder_q-layer.2": 1304.1039, "encoder_q-layer.3": 1045.1055, "encoder_q-layer.4": 995.8894, "encoder_q-layer.5": 961.7192, "encoder_q-layer.6": 940.2786, "encoder_q-layer.7": 877.458, "encoder_q-layer.8": 753.4934, "encoder_q-layer.9": 569.9097, "epoch": 0.51, "inbatch_neg_score": 0.1901, "inbatch_pos_score": 0.8115, "learning_rate": 2.633333333333333e-05, "loss": 3.4786, "norm_diff": 0.1178, "norm_loss": 0.0, "num_token_doc": 66.5736, "num_token_overlap": 14.5282, "num_token_query": 37.2897, "num_token_union": 65.3043, "num_word_context": 202.104, "num_word_doc": 49.6574, "num_word_query": 27.918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1989.891, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1901, "query_norm": 1.2994, "queue_k_norm": 1.4132, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2897, "sent_len_1": 66.5736, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4725, "stdk": 0.0486, "stdq": 0.0434, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.475, "doc_norm": 1.4145, "encoder_q-embeddings": 522.5087, "encoder_q-layer.0": 353.7895, "encoder_q-layer.1": 353.2907, "encoder_q-layer.10": 512.9905, "encoder_q-layer.11": 1426.2192, "encoder_q-layer.2": 385.1471, "encoder_q-layer.3": 405.4376, "encoder_q-layer.4": 415.6889, "encoder_q-layer.5": 422.0796, "encoder_q-layer.6": 452.7221, "encoder_q-layer.7": 501.224, "encoder_q-layer.8": 603.5801, "encoder_q-layer.9": 529.2363, "epoch": 0.51, "inbatch_neg_score": 0.1912, "inbatch_pos_score": 0.8218, "learning_rate": 2.627777777777778e-05, "loss": 3.475, "norm_diff": 0.1084, "norm_loss": 0.0, "num_token_doc": 66.9349, "num_token_overlap": 14.6054, "num_token_query": 37.3719, "num_token_union": 65.5349, "num_word_context": 202.4998, "num_word_doc": 49.9976, "num_word_query": 27.9864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 919.7743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1929, "query_norm": 1.3061, "queue_k_norm": 1.4109, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3719, "sent_len_1": 66.9349, "sent_len_max_0": 128.0, "sent_len_max_1": 187.72, "stdk": 0.0485, "stdq": 0.0436, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4915, "doc_norm": 1.4143, "encoder_q-embeddings": 1158.6224, "encoder_q-layer.0": 785.0911, "encoder_q-layer.1": 848.6508, "encoder_q-layer.10": 1301.9722, "encoder_q-layer.11": 2974.3237, "encoder_q-layer.2": 945.1186, "encoder_q-layer.3": 996.4553, "encoder_q-layer.4": 995.3246, "encoder_q-layer.5": 1054.731, "encoder_q-layer.6": 1133.86, "encoder_q-layer.7": 1244.7783, "encoder_q-layer.8": 1375.692, "encoder_q-layer.9": 1234.9412, "epoch": 0.52, "inbatch_neg_score": 0.1888, "inbatch_pos_score": 0.8281, "learning_rate": 2.6222222222222226e-05, "loss": 3.4915, "norm_diff": 0.0853, "norm_loss": 0.0, "num_token_doc": 66.716, "num_token_overlap": 14.5762, "num_token_query": 37.322, "num_token_union": 65.3226, "num_word_context": 202.312, "num_word_doc": 49.747, "num_word_query": 27.8866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2047.124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1881, "query_norm": 1.329, "queue_k_norm": 1.4122, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.322, "sent_len_1": 66.716, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3562, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4751, "doc_norm": 1.4087, "encoder_q-embeddings": 1335.6674, "encoder_q-layer.0": 942.1726, "encoder_q-layer.1": 1011.0041, "encoder_q-layer.10": 1190.9563, "encoder_q-layer.11": 2984.854, "encoder_q-layer.2": 1189.79, "encoder_q-layer.3": 1170.183, "encoder_q-layer.4": 1221.7179, "encoder_q-layer.5": 1277.6172, "encoder_q-layer.6": 1113.6548, "encoder_q-layer.7": 1147.812, "encoder_q-layer.8": 1222.5559, "encoder_q-layer.9": 1084.8848, "epoch": 0.52, "inbatch_neg_score": 0.1842, "inbatch_pos_score": 0.8208, "learning_rate": 2.6166666666666668e-05, "loss": 3.4751, "norm_diff": 0.0936, "norm_loss": 0.0, "num_token_doc": 66.7945, "num_token_overlap": 14.5325, "num_token_query": 37.2688, "num_token_union": 65.4045, "num_word_context": 202.5168, "num_word_doc": 49.8515, "num_word_query": 27.9173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2091.5053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1848, "query_norm": 1.3151, "queue_k_norm": 1.4129, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2688, "sent_len_1": 66.7945, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.0888, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4944, "doc_norm": 1.4129, "encoder_q-embeddings": 1200.2762, "encoder_q-layer.0": 823.8437, "encoder_q-layer.1": 898.2586, "encoder_q-layer.10": 1138.7722, "encoder_q-layer.11": 2776.8989, "encoder_q-layer.2": 1037.5396, "encoder_q-layer.3": 1109.9503, "encoder_q-layer.4": 1235.0128, "encoder_q-layer.5": 1346.1027, "encoder_q-layer.6": 1531.2939, "encoder_q-layer.7": 1588.7151, "encoder_q-layer.8": 1536.8789, "encoder_q-layer.9": 1171.9851, "epoch": 0.52, "inbatch_neg_score": 0.1868, "inbatch_pos_score": 0.8174, "learning_rate": 2.6111111111111114e-05, "loss": 3.4944, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.8164, "num_token_overlap": 14.4969, "num_token_query": 37.1102, "num_token_union": 65.2922, "num_word_context": 202.1461, "num_word_doc": 49.788, "num_word_query": 27.7516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.3004, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1863, "query_norm": 1.2993, "queue_k_norm": 1.4122, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1102, "sent_len_1": 66.8164, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.03, "stdk": 0.0484, "stdq": 0.0434, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4878, "doc_norm": 1.4112, "encoder_q-embeddings": 1538.723, "encoder_q-layer.0": 1131.6237, "encoder_q-layer.1": 1189.0175, "encoder_q-layer.10": 1140.3175, "encoder_q-layer.11": 2749.8884, "encoder_q-layer.2": 1384.8712, "encoder_q-layer.3": 1430.2241, "encoder_q-layer.4": 1507.6799, "encoder_q-layer.5": 1318.9205, "encoder_q-layer.6": 1276.3733, "encoder_q-layer.7": 1293.2125, "encoder_q-layer.8": 1302.754, "encoder_q-layer.9": 1091.5396, "epoch": 0.52, "inbatch_neg_score": 0.1827, "inbatch_pos_score": 0.8281, "learning_rate": 2.6055555555555556e-05, "loss": 3.4878, "norm_diff": 0.1018, "norm_loss": 0.0, "num_token_doc": 66.5385, "num_token_overlap": 14.5648, "num_token_query": 37.378, "num_token_union": 65.2875, "num_word_context": 202.1359, "num_word_doc": 49.6384, "num_word_query": 27.9812, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2225.0673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.183, "query_norm": 1.3095, "queue_k_norm": 1.4114, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.378, "sent_len_1": 66.5385, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.3262, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.4897, "doc_norm": 1.414, "encoder_q-embeddings": 1036.6879, "encoder_q-layer.0": 662.05, "encoder_q-layer.1": 682.3784, "encoder_q-layer.10": 1141.8318, "encoder_q-layer.11": 2742.1165, "encoder_q-layer.2": 752.4897, "encoder_q-layer.3": 771.41, "encoder_q-layer.4": 805.1836, "encoder_q-layer.5": 765.8942, "encoder_q-layer.6": 891.0093, "encoder_q-layer.7": 961.1659, "encoder_q-layer.8": 1184.9248, "encoder_q-layer.9": 1144.4094, "epoch": 0.52, "inbatch_neg_score": 0.1759, "inbatch_pos_score": 0.8345, "learning_rate": 2.6000000000000002e-05, "loss": 3.4897, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.6409, "num_token_overlap": 14.5284, "num_token_query": 37.2423, "num_token_union": 65.2606, "num_word_context": 201.9214, "num_word_doc": 49.6841, "num_word_query": 27.8664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1805.6329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1776, "query_norm": 1.3152, "queue_k_norm": 1.4101, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2423, "sent_len_1": 66.6409, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7912, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5038, "doc_norm": 1.4092, "encoder_q-embeddings": 1632.8867, "encoder_q-layer.0": 1126.0283, "encoder_q-layer.1": 1251.2362, "encoder_q-layer.10": 1184.2397, "encoder_q-layer.11": 3095.0149, "encoder_q-layer.2": 1404.2089, "encoder_q-layer.3": 1455.7224, "encoder_q-layer.4": 1398.8851, "encoder_q-layer.5": 1208.1052, "encoder_q-layer.6": 1207.028, "encoder_q-layer.7": 1231.6235, "encoder_q-layer.8": 1304.6445, "encoder_q-layer.9": 1218.9004, "epoch": 0.52, "inbatch_neg_score": 0.1805, "inbatch_pos_score": 0.7915, "learning_rate": 2.5944444444444444e-05, "loss": 3.5038, "norm_diff": 0.1016, "norm_loss": 0.0, "num_token_doc": 66.8912, "num_token_overlap": 14.5352, "num_token_query": 37.2666, "num_token_union": 65.4903, "num_word_context": 202.3563, "num_word_doc": 49.8672, "num_word_query": 27.8906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2309.165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1807, "query_norm": 1.3076, "queue_k_norm": 1.409, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2666, "sent_len_1": 66.8912, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1138, "stdk": 0.0483, "stdq": 0.0438, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4724, "doc_norm": 1.4177, "encoder_q-embeddings": 1464.781, "encoder_q-layer.0": 995.4853, "encoder_q-layer.1": 1127.8916, "encoder_q-layer.10": 1171.9255, "encoder_q-layer.11": 2983.2527, "encoder_q-layer.2": 1309.0979, "encoder_q-layer.3": 1386.2892, "encoder_q-layer.4": 1414.2416, "encoder_q-layer.5": 1433.3722, "encoder_q-layer.6": 1412.1447, "encoder_q-layer.7": 1469.571, "encoder_q-layer.8": 1282.8696, "encoder_q-layer.9": 1181.0265, "epoch": 0.52, "inbatch_neg_score": 0.1838, "inbatch_pos_score": 0.8413, "learning_rate": 2.588888888888889e-05, "loss": 3.4724, "norm_diff": 0.0981, "norm_loss": 0.0, "num_token_doc": 66.6608, "num_token_overlap": 14.5507, "num_token_query": 37.277, "num_token_union": 65.351, "num_word_context": 202.4243, "num_word_doc": 49.7786, "num_word_query": 27.9302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2276.7776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1832, "query_norm": 1.3196, "queue_k_norm": 1.4091, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.277, "sent_len_1": 66.6608, "sent_len_max_0": 127.99, "sent_len_max_1": 190.5813, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4649, "doc_norm": 1.4125, "encoder_q-embeddings": 1145.9402, "encoder_q-layer.0": 762.1034, "encoder_q-layer.1": 772.0246, "encoder_q-layer.10": 1137.8153, "encoder_q-layer.11": 2914.6123, "encoder_q-layer.2": 851.2546, "encoder_q-layer.3": 901.2203, "encoder_q-layer.4": 981.304, "encoder_q-layer.5": 950.4705, "encoder_q-layer.6": 1008.1716, "encoder_q-layer.7": 1135.713, "encoder_q-layer.8": 1266.2084, "encoder_q-layer.9": 1117.1099, "epoch": 0.52, "inbatch_neg_score": 0.1817, "inbatch_pos_score": 0.8252, "learning_rate": 2.5833333333333336e-05, "loss": 3.4649, "norm_diff": 0.09, "norm_loss": 0.0, "num_token_doc": 66.8629, "num_token_overlap": 14.6624, "num_token_query": 37.3885, "num_token_union": 65.4413, "num_word_context": 202.0249, "num_word_doc": 49.8612, "num_word_query": 27.9786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1951.0731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1823, "query_norm": 1.3225, "queue_k_norm": 1.4101, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3885, "sent_len_1": 66.8629, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4412, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4484, "doc_norm": 1.4094, "encoder_q-embeddings": 1189.9938, "encoder_q-layer.0": 788.5273, "encoder_q-layer.1": 845.8882, "encoder_q-layer.10": 1164.6229, "encoder_q-layer.11": 2843.1423, "encoder_q-layer.2": 954.5779, "encoder_q-layer.3": 1045.1117, "encoder_q-layer.4": 1100.9194, "encoder_q-layer.5": 1098.1637, "encoder_q-layer.6": 1071.8351, "encoder_q-layer.7": 1187.528, "encoder_q-layer.8": 1374.2097, "encoder_q-layer.9": 1232.7599, "epoch": 0.52, "inbatch_neg_score": 0.1758, "inbatch_pos_score": 0.8184, "learning_rate": 2.5777777777777778e-05, "loss": 3.4484, "norm_diff": 0.0712, "norm_loss": 0.0, "num_token_doc": 66.7767, "num_token_overlap": 14.5838, "num_token_query": 37.2817, "num_token_union": 65.3561, "num_word_context": 202.129, "num_word_doc": 49.8774, "num_word_query": 27.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2024.2998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1755, "query_norm": 1.3382, "queue_k_norm": 1.4079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2817, "sent_len_1": 66.7767, "sent_len_max_0": 128.0, "sent_len_max_1": 189.515, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.488, "doc_norm": 1.4025, "encoder_q-embeddings": 1199.5256, "encoder_q-layer.0": 824.4774, "encoder_q-layer.1": 858.2903, "encoder_q-layer.10": 1140.8645, "encoder_q-layer.11": 2884.7046, "encoder_q-layer.2": 981.8428, "encoder_q-layer.3": 1031.2247, "encoder_q-layer.4": 1086.9199, "encoder_q-layer.5": 1074.2942, "encoder_q-layer.6": 1218.985, "encoder_q-layer.7": 1244.0627, "encoder_q-layer.8": 1341.3412, "encoder_q-layer.9": 1191.7158, "epoch": 0.52, "inbatch_neg_score": 0.1771, "inbatch_pos_score": 0.7959, "learning_rate": 2.5722222222222224e-05, "loss": 3.488, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.5837, "num_token_overlap": 14.5731, "num_token_query": 37.2836, "num_token_union": 65.2046, "num_word_context": 202.1326, "num_word_doc": 49.679, "num_word_query": 27.9152, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2026.3983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.177, "query_norm": 1.3056, "queue_k_norm": 1.4089, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2836, "sent_len_1": 66.5837, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5312, "stdk": 0.0482, "stdq": 0.0437, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4623, "doc_norm": 1.4052, "encoder_q-embeddings": 11929.1416, "encoder_q-layer.0": 8525.8857, "encoder_q-layer.1": 8966.8809, "encoder_q-layer.10": 1179.907, "encoder_q-layer.11": 2905.8765, "encoder_q-layer.2": 11700.4727, "encoder_q-layer.3": 12169.3145, "encoder_q-layer.4": 12690.1094, "encoder_q-layer.5": 10995.9033, "encoder_q-layer.6": 7365.582, "encoder_q-layer.7": 7280.3809, "encoder_q-layer.8": 4482.1562, "encoder_q-layer.9": 1718.2058, "epoch": 0.53, "inbatch_neg_score": 0.1807, "inbatch_pos_score": 0.813, "learning_rate": 2.5666666666666666e-05, "loss": 3.4623, "norm_diff": 0.0737, "norm_loss": 0.0, "num_token_doc": 66.5164, "num_token_overlap": 14.6246, "num_token_query": 37.3709, "num_token_union": 65.3049, "num_word_context": 201.9633, "num_word_doc": 49.6562, "num_word_query": 27.9589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13704.7727, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1802, "query_norm": 1.3315, "queue_k_norm": 1.4089, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3709, "sent_len_1": 66.5164, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5687, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4813, "doc_norm": 1.4076, "encoder_q-embeddings": 1026.2277, "encoder_q-layer.0": 699.472, "encoder_q-layer.1": 712.4857, "encoder_q-layer.10": 1149.9373, "encoder_q-layer.11": 2824.8098, "encoder_q-layer.2": 782.3881, "encoder_q-layer.3": 831.2025, "encoder_q-layer.4": 838.604, "encoder_q-layer.5": 841.002, "encoder_q-layer.6": 898.562, "encoder_q-layer.7": 992.999, "encoder_q-layer.8": 1131.7976, "encoder_q-layer.9": 1091.4321, "epoch": 0.53, "inbatch_neg_score": 0.1851, "inbatch_pos_score": 0.8125, "learning_rate": 2.5611111111111115e-05, "loss": 3.4813, "norm_diff": 0.0884, "norm_loss": 0.0, "num_token_doc": 66.676, "num_token_overlap": 14.6325, "num_token_query": 37.4002, "num_token_union": 65.3067, "num_word_context": 202.1768, "num_word_doc": 49.7607, "num_word_query": 27.9942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1809.4913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1846, "query_norm": 1.3192, "queue_k_norm": 1.4061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4002, "sent_len_1": 66.676, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4462, "stdk": 0.0483, "stdq": 0.0439, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4644, "doc_norm": 1.4105, "encoder_q-embeddings": 1331.4321, "encoder_q-layer.0": 866.1715, "encoder_q-layer.1": 944.9487, "encoder_q-layer.10": 1189.2815, "encoder_q-layer.11": 2956.0591, "encoder_q-layer.2": 1026.6906, "encoder_q-layer.3": 1005.6964, "encoder_q-layer.4": 1123.8025, "encoder_q-layer.5": 1062.9751, "encoder_q-layer.6": 1088.9554, "encoder_q-layer.7": 1151.0476, "encoder_q-layer.8": 1289.1426, "encoder_q-layer.9": 1169.225, "epoch": 0.53, "inbatch_neg_score": 0.1873, "inbatch_pos_score": 0.835, "learning_rate": 2.5555555555555554e-05, "loss": 3.4644, "norm_diff": 0.0703, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 14.5901, "num_token_query": 37.3646, "num_token_union": 65.3724, "num_word_context": 202.1893, "num_word_doc": 49.7992, "num_word_query": 27.9751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2032.7852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1888, "query_norm": 1.3402, "queue_k_norm": 1.4062, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3646, "sent_len_1": 66.7383, "sent_len_max_0": 127.9838, "sent_len_max_1": 190.0625, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.4647, "doc_norm": 1.4104, "encoder_q-embeddings": 4167.2275, "encoder_q-layer.0": 3031.064, "encoder_q-layer.1": 3076.6255, "encoder_q-layer.10": 1174.8665, "encoder_q-layer.11": 2867.4602, "encoder_q-layer.2": 3479.5852, "encoder_q-layer.3": 3707.7463, "encoder_q-layer.4": 3964.0923, "encoder_q-layer.5": 4436.7251, "encoder_q-layer.6": 4130.8452, "encoder_q-layer.7": 3821.2024, "encoder_q-layer.8": 1916.8721, "encoder_q-layer.9": 1203.3292, "epoch": 0.53, "inbatch_neg_score": 0.1918, "inbatch_pos_score": 0.8262, "learning_rate": 2.5500000000000003e-05, "loss": 3.4647, "norm_diff": 0.0828, "norm_loss": 0.0, "num_token_doc": 66.6629, "num_token_overlap": 14.6808, "num_token_query": 37.5211, "num_token_union": 65.3409, "num_word_context": 202.2426, "num_word_doc": 49.7928, "num_word_query": 28.094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5138.3143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.191, "query_norm": 1.3276, "queue_k_norm": 1.408, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5211, "sent_len_1": 66.6629, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2537, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4673, "doc_norm": 1.4083, "encoder_q-embeddings": 1603.2501, "encoder_q-layer.0": 1211.8284, "encoder_q-layer.1": 1155.7983, "encoder_q-layer.10": 1112.9731, "encoder_q-layer.11": 2904.8064, "encoder_q-layer.2": 1507.4403, "encoder_q-layer.3": 1309.1002, "encoder_q-layer.4": 1208.3766, "encoder_q-layer.5": 1172.2614, "encoder_q-layer.6": 1107.859, "encoder_q-layer.7": 1176.3436, "encoder_q-layer.8": 1317.1885, "encoder_q-layer.9": 1097.4858, "epoch": 0.53, "inbatch_neg_score": 0.1926, "inbatch_pos_score": 0.8027, "learning_rate": 2.5444444444444442e-05, "loss": 3.4673, "norm_diff": 0.1031, "norm_loss": 0.0, "num_token_doc": 66.7806, "num_token_overlap": 14.5535, "num_token_query": 37.273, "num_token_union": 65.4028, "num_word_context": 202.3311, "num_word_doc": 49.8548, "num_word_query": 27.9128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2209.0063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1923, "query_norm": 1.3052, "queue_k_norm": 1.4075, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.273, "sent_len_1": 66.7806, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.035, "stdk": 0.0483, "stdq": 0.0429, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.4596, "doc_norm": 1.4123, "encoder_q-embeddings": 1341.4976, "encoder_q-layer.0": 966.7145, "encoder_q-layer.1": 956.2635, "encoder_q-layer.10": 1178.7653, "encoder_q-layer.11": 2936.678, "encoder_q-layer.2": 1012.9294, "encoder_q-layer.3": 1026.0492, "encoder_q-layer.4": 1052.7548, "encoder_q-layer.5": 1065.3602, "encoder_q-layer.6": 1057.8445, "encoder_q-layer.7": 1109.7769, "encoder_q-layer.8": 1336.4728, "encoder_q-layer.9": 1188.9832, "epoch": 0.53, "inbatch_neg_score": 0.1959, "inbatch_pos_score": 0.8574, "learning_rate": 2.538888888888889e-05, "loss": 3.4596, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 66.7243, "num_token_overlap": 14.5655, "num_token_query": 37.3106, "num_token_union": 65.3323, "num_word_context": 202.332, "num_word_doc": 49.7375, "num_word_query": 27.9162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2041.6677, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1954, "query_norm": 1.3519, "queue_k_norm": 1.4086, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3106, "sent_len_1": 66.7243, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.2175, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.4749, "doc_norm": 1.4087, "encoder_q-embeddings": 1398.1686, "encoder_q-layer.0": 990.0598, "encoder_q-layer.1": 1047.1315, "encoder_q-layer.10": 1126.9001, "encoder_q-layer.11": 2974.9331, "encoder_q-layer.2": 1169.3368, "encoder_q-layer.3": 1211.6434, "encoder_q-layer.4": 1284.4963, "encoder_q-layer.5": 1302.326, "encoder_q-layer.6": 1326.3295, "encoder_q-layer.7": 1281.1934, "encoder_q-layer.8": 1295.9243, "encoder_q-layer.9": 1140.7625, "epoch": 0.53, "inbatch_neg_score": 0.2034, "inbatch_pos_score": 0.8159, "learning_rate": 2.5333333333333337e-05, "loss": 3.4749, "norm_diff": 0.0785, "norm_loss": 0.0, "num_token_doc": 66.6727, "num_token_overlap": 14.6315, "num_token_query": 37.428, "num_token_union": 65.3265, "num_word_context": 202.0531, "num_word_doc": 49.7814, "num_word_query": 28.0608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2217.8, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2018, "query_norm": 1.3302, "queue_k_norm": 1.4084, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.428, "sent_len_1": 66.6727, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8613, "stdk": 0.0483, "stdq": 0.0437, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4546, "doc_norm": 1.4048, "encoder_q-embeddings": 2150.8765, "encoder_q-layer.0": 1533.1874, "encoder_q-layer.1": 1618.4769, "encoder_q-layer.10": 1116.5251, "encoder_q-layer.11": 2759.5979, "encoder_q-layer.2": 1793.1498, "encoder_q-layer.3": 1980.7627, "encoder_q-layer.4": 1951.0994, "encoder_q-layer.5": 1820.1359, "encoder_q-layer.6": 1898.2396, "encoder_q-layer.7": 1918.3202, "encoder_q-layer.8": 1885.0319, "encoder_q-layer.9": 1266.3737, "epoch": 0.53, "inbatch_neg_score": 0.2041, "inbatch_pos_score": 0.8657, "learning_rate": 2.527777777777778e-05, "loss": 3.4546, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.6797, "num_token_overlap": 14.6368, "num_token_query": 37.3947, "num_token_union": 65.3391, "num_word_context": 202.2133, "num_word_doc": 49.7203, "num_word_query": 27.9932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2801.6658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2046, "query_norm": 1.3617, "queue_k_norm": 1.4088, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3947, "sent_len_1": 66.6797, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2587, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4642, "doc_norm": 1.403, "encoder_q-embeddings": 1035.7695, "encoder_q-layer.0": 720.3843, "encoder_q-layer.1": 733.1196, "encoder_q-layer.10": 1119.5735, "encoder_q-layer.11": 2860.9458, "encoder_q-layer.2": 826.137, "encoder_q-layer.3": 852.3821, "encoder_q-layer.4": 891.4199, "encoder_q-layer.5": 875.8934, "encoder_q-layer.6": 934.6328, "encoder_q-layer.7": 1017.0815, "encoder_q-layer.8": 1243.6168, "encoder_q-layer.9": 1154.833, "epoch": 0.53, "inbatch_neg_score": 0.2139, "inbatch_pos_score": 0.8403, "learning_rate": 2.5222222222222225e-05, "loss": 3.4642, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.8292, "num_token_overlap": 14.5862, "num_token_query": 37.209, "num_token_union": 65.3058, "num_word_context": 202.6306, "num_word_doc": 49.8846, "num_word_query": 27.8567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1883.7972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.213, "query_norm": 1.3598, "queue_k_norm": 1.4085, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.209, "sent_len_1": 66.8292, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.6775, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.4596, "doc_norm": 1.4129, "encoder_q-embeddings": 1141.4142, "encoder_q-layer.0": 760.9944, "encoder_q-layer.1": 783.0695, "encoder_q-layer.10": 1215.4622, "encoder_q-layer.11": 2905.3801, "encoder_q-layer.2": 878.3328, "encoder_q-layer.3": 916.1505, "encoder_q-layer.4": 1005.1028, "encoder_q-layer.5": 1000.1863, "encoder_q-layer.6": 1030.7286, "encoder_q-layer.7": 1145.4469, "encoder_q-layer.8": 1299.1636, "encoder_q-layer.9": 1184.2177, "epoch": 0.53, "inbatch_neg_score": 0.2142, "inbatch_pos_score": 0.8223, "learning_rate": 2.5166666666666667e-05, "loss": 3.4596, "norm_diff": 0.0703, "norm_loss": 0.0, "num_token_doc": 66.6309, "num_token_overlap": 14.5418, "num_token_query": 37.2715, "num_token_union": 65.2566, "num_word_context": 201.8559, "num_word_doc": 49.6945, "num_word_query": 27.8992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1939.8551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2147, "query_norm": 1.3426, "queue_k_norm": 1.4096, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2715, "sent_len_1": 66.6309, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5312, "stdk": 0.0485, "stdq": 0.0439, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.4718, "doc_norm": 1.4151, "encoder_q-embeddings": 3945.4597, "encoder_q-layer.0": 2857.6516, "encoder_q-layer.1": 3131.2036, "encoder_q-layer.10": 2494.6768, "encoder_q-layer.11": 5878.8735, "encoder_q-layer.2": 4263.3779, "encoder_q-layer.3": 4284.438, "encoder_q-layer.4": 4454.8989, "encoder_q-layer.5": 3833.4807, "encoder_q-layer.6": 3861.8157, "encoder_q-layer.7": 3773.1753, "encoder_q-layer.8": 2784.8704, "encoder_q-layer.9": 2267.4109, "epoch": 0.54, "inbatch_neg_score": 0.2246, "inbatch_pos_score": 0.8809, "learning_rate": 2.5111111111111113e-05, "loss": 3.4718, "norm_diff": 0.0523, "norm_loss": 0.0, "num_token_doc": 66.5663, "num_token_overlap": 14.5426, "num_token_query": 37.3281, "num_token_union": 65.3062, "num_word_context": 202.0356, "num_word_doc": 49.7013, "num_word_query": 27.9365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5748.495, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2246, "query_norm": 1.3628, "queue_k_norm": 1.4116, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3281, "sent_len_1": 66.5663, "sent_len_max_0": 128.0, "sent_len_max_1": 186.49, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4708, "doc_norm": 1.4078, "encoder_q-embeddings": 2408.1145, "encoder_q-layer.0": 1673.6659, "encoder_q-layer.1": 1813.3013, "encoder_q-layer.10": 2086.0276, "encoder_q-layer.11": 5334.1226, "encoder_q-layer.2": 1982.9343, "encoder_q-layer.3": 2087.4294, "encoder_q-layer.4": 2276.0032, "encoder_q-layer.5": 2321.1089, "encoder_q-layer.6": 2438.6091, "encoder_q-layer.7": 2374.3528, "encoder_q-layer.8": 2338.2463, "encoder_q-layer.9": 2045.7737, "epoch": 0.54, "inbatch_neg_score": 0.2198, "inbatch_pos_score": 0.8613, "learning_rate": 2.5055555555555555e-05, "loss": 3.4708, "norm_diff": 0.0698, "norm_loss": 0.0, "num_token_doc": 66.6486, "num_token_overlap": 14.6103, "num_token_query": 37.3015, "num_token_union": 65.295, "num_word_context": 202.1944, "num_word_doc": 49.7419, "num_word_query": 27.9075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3849.5263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2197, "query_norm": 1.3379, "queue_k_norm": 1.4126, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3015, "sent_len_1": 66.6486, "sent_len_max_0": 128.0, "sent_len_max_1": 190.325, "stdk": 0.0482, "stdq": 0.044, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.4743, "doc_norm": 1.414, "encoder_q-embeddings": 2742.9878, "encoder_q-layer.0": 1939.7925, "encoder_q-layer.1": 1941.5315, "encoder_q-layer.10": 2329.9431, "encoder_q-layer.11": 5884.9438, "encoder_q-layer.2": 2129.2183, "encoder_q-layer.3": 2207.0386, "encoder_q-layer.4": 2277.8831, "encoder_q-layer.5": 2464.8323, "encoder_q-layer.6": 2379.5994, "encoder_q-layer.7": 2520.7839, "encoder_q-layer.8": 2813.8376, "encoder_q-layer.9": 2392.3081, "epoch": 0.54, "inbatch_neg_score": 0.2194, "inbatch_pos_score": 0.8457, "learning_rate": 2.5e-05, "loss": 3.4743, "norm_diff": 0.0686, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 14.5731, "num_token_query": 37.4302, "num_token_union": 65.4901, "num_word_context": 202.3845, "num_word_doc": 49.8518, "num_word_query": 28.0358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4278.4407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2185, "query_norm": 1.3455, "queue_k_norm": 1.4138, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4302, "sent_len_1": 66.8004, "sent_len_max_0": 127.985, "sent_len_max_1": 189.985, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4526, "doc_norm": 1.4187, "encoder_q-embeddings": 4297.7207, "encoder_q-layer.0": 2921.3613, "encoder_q-layer.1": 3357.9941, "encoder_q-layer.10": 2414.2966, "encoder_q-layer.11": 6359.4097, "encoder_q-layer.2": 4230.0483, "encoder_q-layer.3": 4301.7231, "encoder_q-layer.4": 4353.7349, "encoder_q-layer.5": 3640.26, "encoder_q-layer.6": 3336.7415, "encoder_q-layer.7": 3132.9443, "encoder_q-layer.8": 2881.6084, "encoder_q-layer.9": 2427.4846, "epoch": 0.54, "inbatch_neg_score": 0.2172, "inbatch_pos_score": 0.8848, "learning_rate": 2.4944444444444447e-05, "loss": 3.4526, "norm_diff": 0.0456, "norm_loss": 0.0, "num_token_doc": 66.6322, "num_token_overlap": 14.6116, "num_token_query": 37.3623, "num_token_union": 65.2895, "num_word_context": 202.3068, "num_word_doc": 49.7113, "num_word_query": 27.9921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5756.3974, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2166, "query_norm": 1.3731, "queue_k_norm": 1.4131, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3623, "sent_len_1": 66.6322, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1875, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4765, "doc_norm": 1.4113, "encoder_q-embeddings": 3052.5403, "encoder_q-layer.0": 2060.4651, "encoder_q-layer.1": 2230.137, "encoder_q-layer.10": 2255.8608, "encoder_q-layer.11": 6147.4956, "encoder_q-layer.2": 2534.5166, "encoder_q-layer.3": 2678.822, "encoder_q-layer.4": 2739.2046, "encoder_q-layer.5": 2832.1548, "encoder_q-layer.6": 2840.8149, "encoder_q-layer.7": 2786.5886, "encoder_q-layer.8": 2738.2058, "encoder_q-layer.9": 2434.8311, "epoch": 0.54, "inbatch_neg_score": 0.2158, "inbatch_pos_score": 0.833, "learning_rate": 2.488888888888889e-05, "loss": 3.4765, "norm_diff": 0.0906, "norm_loss": 0.0, "num_token_doc": 66.965, "num_token_overlap": 14.6099, "num_token_query": 37.2551, "num_token_union": 65.4114, "num_word_context": 202.3276, "num_word_doc": 49.9941, "num_word_query": 27.8732, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4592.417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2147, "query_norm": 1.3207, "queue_k_norm": 1.4156, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2551, "sent_len_1": 66.965, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.2862, "stdk": 0.0482, "stdq": 0.0438, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4602, "doc_norm": 1.4191, "encoder_q-embeddings": 2633.6863, "encoder_q-layer.0": 1811.0835, "encoder_q-layer.1": 1879.0886, "encoder_q-layer.10": 2360.293, "encoder_q-layer.11": 6256.0347, "encoder_q-layer.2": 2236.9727, "encoder_q-layer.3": 2448.96, "encoder_q-layer.4": 2402.5752, "encoder_q-layer.5": 2769.4136, "encoder_q-layer.6": 2568.1191, "encoder_q-layer.7": 2522.573, "encoder_q-layer.8": 2910.79, "encoder_q-layer.9": 2395.7776, "epoch": 0.54, "inbatch_neg_score": 0.2141, "inbatch_pos_score": 0.8354, "learning_rate": 2.4833333333333335e-05, "loss": 3.4602, "norm_diff": 0.1077, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 14.6464, "num_token_query": 37.4492, "num_token_union": 65.4862, "num_word_context": 202.1414, "num_word_doc": 49.8977, "num_word_query": 28.0431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4414.5796, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2139, "query_norm": 1.3113, "queue_k_norm": 1.4169, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4492, "sent_len_1": 66.883, "sent_len_max_0": 128.0, "sent_len_max_1": 189.77, "stdk": 0.0485, "stdq": 0.0435, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4575, "doc_norm": 1.4205, "encoder_q-embeddings": 2747.7722, "encoder_q-layer.0": 1956.1035, "encoder_q-layer.1": 2029.0698, "encoder_q-layer.10": 2376.4905, "encoder_q-layer.11": 5793.7573, "encoder_q-layer.2": 2307.0166, "encoder_q-layer.3": 2354.6062, "encoder_q-layer.4": 2493.7839, "encoder_q-layer.5": 2442.3491, "encoder_q-layer.6": 2465.3618, "encoder_q-layer.7": 2396.9104, "encoder_q-layer.8": 2531.8125, "encoder_q-layer.9": 2306.0542, "epoch": 0.54, "inbatch_neg_score": 0.2158, "inbatch_pos_score": 0.8672, "learning_rate": 2.477777777777778e-05, "loss": 3.4575, "norm_diff": 0.0747, "norm_loss": 0.0, "num_token_doc": 66.8381, "num_token_overlap": 14.6499, "num_token_query": 37.4547, "num_token_union": 65.4078, "num_word_context": 202.2739, "num_word_doc": 49.9146, "num_word_query": 28.0735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4190.9411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2168, "query_norm": 1.3458, "queue_k_norm": 1.4152, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4547, "sent_len_1": 66.8381, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1138, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.4473, "doc_norm": 1.417, "encoder_q-embeddings": 2416.5144, "encoder_q-layer.0": 1672.3434, "encoder_q-layer.1": 1716.5098, "encoder_q-layer.10": 2277.3457, "encoder_q-layer.11": 5938.6685, "encoder_q-layer.2": 1846.5527, "encoder_q-layer.3": 1842.4121, "encoder_q-layer.4": 1857.527, "encoder_q-layer.5": 1838.7485, "encoder_q-layer.6": 2019.3583, "encoder_q-layer.7": 2310.8467, "encoder_q-layer.8": 2538.9451, "encoder_q-layer.9": 2230.1943, "epoch": 0.54, "inbatch_neg_score": 0.2132, "inbatch_pos_score": 0.856, "learning_rate": 2.4722222222222223e-05, "loss": 3.4473, "norm_diff": 0.0852, "norm_loss": 0.0, "num_token_doc": 66.6738, "num_token_overlap": 14.636, "num_token_query": 37.3847, "num_token_union": 65.3034, "num_word_context": 202.0403, "num_word_doc": 49.768, "num_word_query": 28.0015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4005.8242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2133, "query_norm": 1.3318, "queue_k_norm": 1.416, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3847, "sent_len_1": 66.6738, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4212, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4819, "doc_norm": 1.4182, "encoder_q-embeddings": 4135.6792, "encoder_q-layer.0": 2782.2336, "encoder_q-layer.1": 3027.5481, "encoder_q-layer.10": 2334.9255, "encoder_q-layer.11": 5917.1982, "encoder_q-layer.2": 3685.6497, "encoder_q-layer.3": 3677.5935, "encoder_q-layer.4": 4390.5786, "encoder_q-layer.5": 4203.0928, "encoder_q-layer.6": 3835.9363, "encoder_q-layer.7": 3155.7256, "encoder_q-layer.8": 2697.729, "encoder_q-layer.9": 2197.9966, "epoch": 0.54, "inbatch_neg_score": 0.214, "inbatch_pos_score": 0.8374, "learning_rate": 2.466666666666667e-05, "loss": 3.4819, "norm_diff": 0.101, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 14.5646, "num_token_query": 37.3166, "num_token_union": 65.2582, "num_word_context": 202.0012, "num_word_doc": 49.693, "num_word_query": 27.9311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5600.1874, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2141, "query_norm": 1.3171, "queue_k_norm": 1.4171, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3166, "sent_len_1": 66.6003, "sent_len_max_0": 127.9663, "sent_len_max_1": 188.6975, "stdk": 0.0484, "stdq": 0.0438, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4752, "doc_norm": 1.4164, "encoder_q-embeddings": 4864.4233, "encoder_q-layer.0": 3305.8699, "encoder_q-layer.1": 3577.4124, "encoder_q-layer.10": 2333.3813, "encoder_q-layer.11": 6117.6738, "encoder_q-layer.2": 4043.1631, "encoder_q-layer.3": 4326.1553, "encoder_q-layer.4": 4286.708, "encoder_q-layer.5": 4079.4878, "encoder_q-layer.6": 3795.4407, "encoder_q-layer.7": 3620.3108, "encoder_q-layer.8": 3045.8325, "encoder_q-layer.9": 2305.6016, "epoch": 0.54, "inbatch_neg_score": 0.2071, "inbatch_pos_score": 0.8413, "learning_rate": 2.461111111111111e-05, "loss": 3.4752, "norm_diff": 0.1052, "norm_loss": 0.0, "num_token_doc": 66.6528, "num_token_overlap": 14.5661, "num_token_query": 37.3671, "num_token_union": 65.3353, "num_word_context": 202.3313, "num_word_doc": 49.7686, "num_word_query": 27.9743, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6038.1446, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2078, "query_norm": 1.3113, "queue_k_norm": 1.4157, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3671, "sent_len_1": 66.6528, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5037, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4732, "doc_norm": 1.4218, "encoder_q-embeddings": 2060.6816, "encoder_q-layer.0": 1413.9244, "encoder_q-layer.1": 1444.2598, "encoder_q-layer.10": 2201.0664, "encoder_q-layer.11": 5780.3179, "encoder_q-layer.2": 1585.9841, "encoder_q-layer.3": 1671.554, "encoder_q-layer.4": 1669.0415, "encoder_q-layer.5": 1666.3815, "encoder_q-layer.6": 1854.2549, "encoder_q-layer.7": 2012.7506, "encoder_q-layer.8": 2404.533, "encoder_q-layer.9": 2216.4534, "epoch": 0.54, "inbatch_neg_score": 0.2055, "inbatch_pos_score": 0.8555, "learning_rate": 2.4555555555555557e-05, "loss": 3.4732, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.8277, "num_token_overlap": 14.6365, "num_token_query": 37.2867, "num_token_union": 65.3177, "num_word_context": 202.1621, "num_word_doc": 49.8345, "num_word_query": 27.9, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3662.6255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2053, "query_norm": 1.3246, "queue_k_norm": 1.4185, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2867, "sent_len_1": 66.8277, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6062, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.4574, "doc_norm": 1.42, "encoder_q-embeddings": 4592.5312, "encoder_q-layer.0": 3364.3665, "encoder_q-layer.1": 3774.7092, "encoder_q-layer.10": 2550.4055, "encoder_q-layer.11": 5925.5747, "encoder_q-layer.2": 4483.6646, "encoder_q-layer.3": 4340.771, "encoder_q-layer.4": 4056.0637, "encoder_q-layer.5": 4400.293, "encoder_q-layer.6": 4063.1062, "encoder_q-layer.7": 4262.4116, "encoder_q-layer.8": 2980.1375, "encoder_q-layer.9": 2316.0071, "epoch": 0.55, "inbatch_neg_score": 0.2092, "inbatch_pos_score": 0.834, "learning_rate": 2.45e-05, "loss": 3.4574, "norm_diff": 0.0985, "norm_loss": 0.0, "num_token_doc": 66.8349, "num_token_overlap": 14.5707, "num_token_query": 37.2662, "num_token_union": 65.3668, "num_word_context": 202.1586, "num_word_doc": 49.8251, "num_word_query": 27.9087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6183.4007, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2081, "query_norm": 1.3215, "queue_k_norm": 1.4169, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2662, "sent_len_1": 66.8349, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2713, "stdk": 0.0485, "stdq": 0.044, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.478, "doc_norm": 1.4121, "encoder_q-embeddings": 3084.2437, "encoder_q-layer.0": 2144.9888, "encoder_q-layer.1": 2401.6243, "encoder_q-layer.10": 2240.4746, "encoder_q-layer.11": 5852.2148, "encoder_q-layer.2": 2769.8516, "encoder_q-layer.3": 2806.9524, "encoder_q-layer.4": 2855.6675, "encoder_q-layer.5": 3170.991, "encoder_q-layer.6": 2947.8398, "encoder_q-layer.7": 2833.8193, "encoder_q-layer.8": 2901.3167, "encoder_q-layer.9": 2247.8638, "epoch": 0.55, "inbatch_neg_score": 0.2077, "inbatch_pos_score": 0.8359, "learning_rate": 2.4444444444444445e-05, "loss": 3.478, "norm_diff": 0.0893, "norm_loss": 0.0, "num_token_doc": 66.7676, "num_token_overlap": 14.5589, "num_token_query": 37.2194, "num_token_union": 65.3219, "num_word_context": 202.4672, "num_word_doc": 49.7553, "num_word_query": 27.8296, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4639.2068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.208, "query_norm": 1.3228, "queue_k_norm": 1.4171, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2194, "sent_len_1": 66.7676, "sent_len_max_0": 128.0, "sent_len_max_1": 190.15, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.489, "doc_norm": 1.4205, "encoder_q-embeddings": 4055.5344, "encoder_q-layer.0": 2855.4919, "encoder_q-layer.1": 3170.7744, "encoder_q-layer.10": 2394.6692, "encoder_q-layer.11": 6111.021, "encoder_q-layer.2": 3687.8191, "encoder_q-layer.3": 4035.1748, "encoder_q-layer.4": 4109.4028, "encoder_q-layer.5": 3786.8364, "encoder_q-layer.6": 3511.5181, "encoder_q-layer.7": 3224.7708, "encoder_q-layer.8": 2599.5557, "encoder_q-layer.9": 2256.7244, "epoch": 0.55, "inbatch_neg_score": 0.2084, "inbatch_pos_score": 0.8511, "learning_rate": 2.4388888888888887e-05, "loss": 3.489, "norm_diff": 0.0907, "norm_loss": 0.0, "num_token_doc": 66.8038, "num_token_overlap": 14.5041, "num_token_query": 37.1572, "num_token_union": 65.3942, "num_word_context": 202.1854, "num_word_doc": 49.8778, "num_word_query": 27.807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5512.0316, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2085, "query_norm": 1.3299, "queue_k_norm": 1.4163, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1572, "sent_len_1": 66.8038, "sent_len_max_0": 127.99, "sent_len_max_1": 189.3212, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.4592, "doc_norm": 1.4121, "encoder_q-embeddings": 3891.3137, "encoder_q-layer.0": 2668.7222, "encoder_q-layer.1": 3048.9048, "encoder_q-layer.10": 2301.0957, "encoder_q-layer.11": 5903.833, "encoder_q-layer.2": 3481.5986, "encoder_q-layer.3": 3763.1262, "encoder_q-layer.4": 3808.6494, "encoder_q-layer.5": 4357.3608, "encoder_q-layer.6": 3714.9001, "encoder_q-layer.7": 3258.665, "encoder_q-layer.8": 3046.4348, "encoder_q-layer.9": 2372.5977, "epoch": 0.55, "inbatch_neg_score": 0.2087, "inbatch_pos_score": 0.8174, "learning_rate": 2.4333333333333336e-05, "loss": 3.4592, "norm_diff": 0.1143, "norm_loss": 0.0, "num_token_doc": 66.8728, "num_token_overlap": 14.5703, "num_token_query": 37.2023, "num_token_union": 65.3819, "num_word_context": 202.6724, "num_word_doc": 49.9327, "num_word_query": 27.8611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5435.5211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2086, "query_norm": 1.2978, "queue_k_norm": 1.4165, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2023, "sent_len_1": 66.8728, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.4725, "stdk": 0.0482, "stdq": 0.0431, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.4467, "doc_norm": 1.4086, "encoder_q-embeddings": 2427.946, "encoder_q-layer.0": 1673.663, "encoder_q-layer.1": 1960.3163, "encoder_q-layer.10": 2254.061, "encoder_q-layer.11": 5432.2778, "encoder_q-layer.2": 2106.3242, "encoder_q-layer.3": 2154.5645, "encoder_q-layer.4": 2325.812, "encoder_q-layer.5": 2234.5984, "encoder_q-layer.6": 2246.3313, "encoder_q-layer.7": 2292.1082, "encoder_q-layer.8": 2508.0203, "encoder_q-layer.9": 2101.415, "epoch": 0.55, "inbatch_neg_score": 0.2098, "inbatch_pos_score": 0.853, "learning_rate": 2.427777777777778e-05, "loss": 3.4467, "norm_diff": 0.0858, "norm_loss": 0.0, "num_token_doc": 66.8103, "num_token_overlap": 14.6396, "num_token_query": 37.5154, "num_token_union": 65.4458, "num_word_context": 202.5888, "num_word_doc": 49.8573, "num_word_query": 28.0637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3920.8644, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2089, "query_norm": 1.3228, "queue_k_norm": 1.4187, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5154, "sent_len_1": 66.8103, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8313, "stdk": 0.0481, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4449, "doc_norm": 1.4217, "encoder_q-embeddings": 2987.7495, "encoder_q-layer.0": 2195.6465, "encoder_q-layer.1": 2434.5281, "encoder_q-layer.10": 2450.8542, "encoder_q-layer.11": 5581.1816, "encoder_q-layer.2": 2859.998, "encoder_q-layer.3": 3054.074, "encoder_q-layer.4": 2992.6675, "encoder_q-layer.5": 3174.1626, "encoder_q-layer.6": 3236.5542, "encoder_q-layer.7": 3369.4778, "encoder_q-layer.8": 2692.1814, "encoder_q-layer.9": 2207.3137, "epoch": 0.55, "inbatch_neg_score": 0.2055, "inbatch_pos_score": 0.8677, "learning_rate": 2.4222222222222224e-05, "loss": 3.4449, "norm_diff": 0.0854, "norm_loss": 0.0, "num_token_doc": 67.0081, "num_token_overlap": 14.6361, "num_token_query": 37.6346, "num_token_union": 65.6577, "num_word_context": 202.6207, "num_word_doc": 49.977, "num_word_query": 28.1902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4696.8811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2064, "query_norm": 1.3362, "queue_k_norm": 1.4166, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.6346, "sent_len_1": 67.0081, "sent_len_max_0": 128.0, "sent_len_max_1": 189.375, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.465, "doc_norm": 1.4179, "encoder_q-embeddings": 2944.0503, "encoder_q-layer.0": 2056.7129, "encoder_q-layer.1": 2309.0962, "encoder_q-layer.10": 2273.459, "encoder_q-layer.11": 5577.3013, "encoder_q-layer.2": 2873.6121, "encoder_q-layer.3": 3119.292, "encoder_q-layer.4": 3031.4609, "encoder_q-layer.5": 3013.71, "encoder_q-layer.6": 2456.7954, "encoder_q-layer.7": 2295.2976, "encoder_q-layer.8": 2483.2542, "encoder_q-layer.9": 2189.8818, "epoch": 0.55, "inbatch_neg_score": 0.2069, "inbatch_pos_score": 0.8628, "learning_rate": 2.4166666666666667e-05, "loss": 3.465, "norm_diff": 0.0908, "norm_loss": 0.0, "num_token_doc": 66.7709, "num_token_overlap": 14.5712, "num_token_query": 37.2973, "num_token_union": 65.3702, "num_word_context": 202.2096, "num_word_doc": 49.8048, "num_word_query": 27.919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4448.6519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2069, "query_norm": 1.3271, "queue_k_norm": 1.418, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2973, "sent_len_1": 66.7709, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4187, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.4371, "doc_norm": 1.417, "encoder_q-embeddings": 10207.9551, "encoder_q-layer.0": 6500.0205, "encoder_q-layer.1": 7886.7539, "encoder_q-layer.10": 2290.031, "encoder_q-layer.11": 5561.1094, "encoder_q-layer.2": 8794.0869, "encoder_q-layer.3": 9190.7031, "encoder_q-layer.4": 9074.4717, "encoder_q-layer.5": 8760.8906, "encoder_q-layer.6": 7221.7939, "encoder_q-layer.7": 6419.7842, "encoder_q-layer.8": 3530.0466, "encoder_q-layer.9": 2312.1414, "epoch": 0.55, "inbatch_neg_score": 0.2068, "inbatch_pos_score": 0.8789, "learning_rate": 2.4111111111111113e-05, "loss": 3.4371, "norm_diff": 0.0833, "norm_loss": 0.0, "num_token_doc": 66.9371, "num_token_overlap": 14.652, "num_token_query": 37.517, "num_token_union": 65.5772, "num_word_context": 202.886, "num_word_doc": 49.9673, "num_word_query": 28.1207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11112.9113, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2074, "query_norm": 1.3337, "queue_k_norm": 1.4182, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.517, "sent_len_1": 66.9371, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4688, "stdk": 0.0484, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4472, "doc_norm": 1.4173, "encoder_q-embeddings": 1128.0613, "encoder_q-layer.0": 785.5087, "encoder_q-layer.1": 855.6205, "encoder_q-layer.10": 1091.3433, "encoder_q-layer.11": 2849.5574, "encoder_q-layer.2": 1035.0518, "encoder_q-layer.3": 1031.1653, "encoder_q-layer.4": 1037.5455, "encoder_q-layer.5": 986.2778, "encoder_q-layer.6": 1037.0973, "encoder_q-layer.7": 1122.0699, "encoder_q-layer.8": 1296.9523, "encoder_q-layer.9": 1195.9313, "epoch": 0.55, "inbatch_neg_score": 0.2069, "inbatch_pos_score": 0.8208, "learning_rate": 2.4055555555555555e-05, "loss": 3.4472, "norm_diff": 0.1056, "norm_loss": 0.0, "num_token_doc": 66.8376, "num_token_overlap": 14.6696, "num_token_query": 37.4713, "num_token_union": 65.4148, "num_word_context": 202.2727, "num_word_doc": 49.85, "num_word_query": 28.0548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1970.6867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2052, "query_norm": 1.3117, "queue_k_norm": 1.4176, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4713, "sent_len_1": 66.8376, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5675, "stdk": 0.0484, "stdq": 0.0435, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4422, "doc_norm": 1.4172, "encoder_q-embeddings": 7324.6621, "encoder_q-layer.0": 4930.7529, "encoder_q-layer.1": 5525.4458, "encoder_q-layer.10": 1226.5951, "encoder_q-layer.11": 3152.6838, "encoder_q-layer.2": 5053.9424, "encoder_q-layer.3": 4703.9463, "encoder_q-layer.4": 3921.6733, "encoder_q-layer.5": 3062.7437, "encoder_q-layer.6": 3073.0835, "encoder_q-layer.7": 2737.0576, "encoder_q-layer.8": 1769.0952, "encoder_q-layer.9": 1278.2971, "epoch": 0.55, "inbatch_neg_score": 0.2099, "inbatch_pos_score": 0.8438, "learning_rate": 2.4e-05, "loss": 3.4422, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.8143, "num_token_overlap": 14.6747, "num_token_query": 37.4642, "num_token_union": 65.4188, "num_word_context": 202.4493, "num_word_doc": 49.9231, "num_word_query": 28.0664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6470.6039, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2091, "query_norm": 1.3487, "queue_k_norm": 1.4158, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4642, "sent_len_1": 66.8143, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.265, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.4489, "doc_norm": 1.4159, "encoder_q-embeddings": 1338.5826, "encoder_q-layer.0": 890.3086, "encoder_q-layer.1": 977.6282, "encoder_q-layer.10": 1156.2017, "encoder_q-layer.11": 2975.082, "encoder_q-layer.2": 1107.3761, "encoder_q-layer.3": 1155.7886, "encoder_q-layer.4": 1213.0308, "encoder_q-layer.5": 1172.629, "encoder_q-layer.6": 1195.9015, "encoder_q-layer.7": 1306.8916, "encoder_q-layer.8": 1389.8527, "encoder_q-layer.9": 1269.7007, "epoch": 0.56, "inbatch_neg_score": 0.205, "inbatch_pos_score": 0.8325, "learning_rate": 2.3944444444444443e-05, "loss": 3.4489, "norm_diff": 0.0839, "norm_loss": 0.0, "num_token_doc": 66.7635, "num_token_overlap": 14.6322, "num_token_query": 37.5546, "num_token_union": 65.492, "num_word_context": 202.4247, "num_word_doc": 49.7783, "num_word_query": 28.1321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.1816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2048, "query_norm": 1.332, "queue_k_norm": 1.4188, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5546, "sent_len_1": 66.7635, "sent_len_max_0": 128.0, "sent_len_max_1": 191.64, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4739, "doc_norm": 1.4184, "encoder_q-embeddings": 1298.2699, "encoder_q-layer.0": 876.448, "encoder_q-layer.1": 1012.1019, "encoder_q-layer.10": 1171.4266, "encoder_q-layer.11": 2865.6592, "encoder_q-layer.2": 1138.3855, "encoder_q-layer.3": 1176.5236, "encoder_q-layer.4": 1309.9156, "encoder_q-layer.5": 1251.1549, "encoder_q-layer.6": 1236.4081, "encoder_q-layer.7": 1383.7181, "encoder_q-layer.8": 1315.6003, "encoder_q-layer.9": 1136.2466, "epoch": 0.56, "inbatch_neg_score": 0.2038, "inbatch_pos_score": 0.8354, "learning_rate": 2.3888888888888892e-05, "loss": 3.4739, "norm_diff": 0.1038, "norm_loss": 0.0, "num_token_doc": 66.7177, "num_token_overlap": 14.5123, "num_token_query": 37.3061, "num_token_union": 65.3629, "num_word_context": 202.6424, "num_word_doc": 49.7655, "num_word_query": 27.9184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2104.123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2037, "query_norm": 1.3146, "queue_k_norm": 1.4199, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3061, "sent_len_1": 66.7177, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0563, "stdk": 0.0484, "stdq": 0.0435, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.4467, "doc_norm": 1.4125, "encoder_q-embeddings": 3877.0935, "encoder_q-layer.0": 2823.1743, "encoder_q-layer.1": 3275.6096, "encoder_q-layer.10": 1091.2065, "encoder_q-layer.11": 2921.9529, "encoder_q-layer.2": 4135.8481, "encoder_q-layer.3": 4040.5854, "encoder_q-layer.4": 4168.6255, "encoder_q-layer.5": 3528.0547, "encoder_q-layer.6": 3140.3391, "encoder_q-layer.7": 2092.4065, "encoder_q-layer.8": 1494.8511, "encoder_q-layer.9": 1137.1438, "epoch": 0.56, "inbatch_neg_score": 0.2027, "inbatch_pos_score": 0.8301, "learning_rate": 2.3833333333333334e-05, "loss": 3.4467, "norm_diff": 0.0841, "norm_loss": 0.0, "num_token_doc": 66.9613, "num_token_overlap": 14.5498, "num_token_query": 37.2009, "num_token_union": 65.4479, "num_word_context": 202.4032, "num_word_doc": 49.9719, "num_word_query": 27.8398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4663.7872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2024, "query_norm": 1.3284, "queue_k_norm": 1.4179, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2009, "sent_len_1": 66.9613, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3413, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4603, "doc_norm": 1.4114, "encoder_q-embeddings": 1052.0725, "encoder_q-layer.0": 722.7341, "encoder_q-layer.1": 763.5049, "encoder_q-layer.10": 1121.2843, "encoder_q-layer.11": 2859.0637, "encoder_q-layer.2": 875.0225, "encoder_q-layer.3": 895.0757, "encoder_q-layer.4": 900.1425, "encoder_q-layer.5": 893.4419, "encoder_q-layer.6": 1034.4137, "encoder_q-layer.7": 1187.1714, "encoder_q-layer.8": 1374.959, "encoder_q-layer.9": 1108.4557, "epoch": 0.56, "inbatch_neg_score": 0.2069, "inbatch_pos_score": 0.8369, "learning_rate": 2.377777777777778e-05, "loss": 3.4603, "norm_diff": 0.0835, "norm_loss": 0.0, "num_token_doc": 66.6778, "num_token_overlap": 14.5829, "num_token_query": 37.3435, "num_token_union": 65.3231, "num_word_context": 202.1814, "num_word_doc": 49.7887, "num_word_query": 27.9719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1927.5622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2068, "query_norm": 1.3279, "queue_k_norm": 1.4177, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3435, "sent_len_1": 66.6778, "sent_len_max_0": 127.995, "sent_len_max_1": 189.23, "stdk": 0.0482, "stdq": 0.0439, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4534, "doc_norm": 1.4223, "encoder_q-embeddings": 1316.9438, "encoder_q-layer.0": 890.3695, "encoder_q-layer.1": 959.7316, "encoder_q-layer.10": 1253.8528, "encoder_q-layer.11": 3028.1479, "encoder_q-layer.2": 1109.1758, "encoder_q-layer.3": 1162.4119, "encoder_q-layer.4": 1221.9099, "encoder_q-layer.5": 1162.7134, "encoder_q-layer.6": 1150.8367, "encoder_q-layer.7": 1195.5322, "encoder_q-layer.8": 1311.1263, "encoder_q-layer.9": 1158.0057, "epoch": 0.56, "inbatch_neg_score": 0.2102, "inbatch_pos_score": 0.8496, "learning_rate": 2.3722222222222222e-05, "loss": 3.4534, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 66.7509, "num_token_overlap": 14.6529, "num_token_query": 37.4668, "num_token_union": 65.3882, "num_word_context": 202.5056, "num_word_doc": 49.8343, "num_word_query": 28.0645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.8918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2091, "query_norm": 1.3459, "queue_k_norm": 1.4178, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4668, "sent_len_1": 66.7509, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0425, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4414, "doc_norm": 1.4111, "encoder_q-embeddings": 1375.9254, "encoder_q-layer.0": 899.9513, "encoder_q-layer.1": 952.5343, "encoder_q-layer.10": 1187.474, "encoder_q-layer.11": 2896.0874, "encoder_q-layer.2": 1117.3528, "encoder_q-layer.3": 1181.0791, "encoder_q-layer.4": 1176.4644, "encoder_q-layer.5": 1223.004, "encoder_q-layer.6": 1317.8329, "encoder_q-layer.7": 1254.9348, "encoder_q-layer.8": 1317.6277, "encoder_q-layer.9": 1118.6196, "epoch": 0.56, "inbatch_neg_score": 0.2139, "inbatch_pos_score": 0.8633, "learning_rate": 2.3666666666666668e-05, "loss": 3.4414, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.793, "num_token_overlap": 14.5581, "num_token_query": 37.1858, "num_token_union": 65.3448, "num_word_context": 201.9791, "num_word_doc": 49.83, "num_word_query": 27.8344, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2146.2635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2129, "query_norm": 1.3657, "queue_k_norm": 1.4192, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1858, "sent_len_1": 66.793, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4888, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.4384, "doc_norm": 1.4131, "encoder_q-embeddings": 1791.3761, "encoder_q-layer.0": 1245.0956, "encoder_q-layer.1": 1348.093, "encoder_q-layer.10": 1171.1171, "encoder_q-layer.11": 2859.0417, "encoder_q-layer.2": 1483.7933, "encoder_q-layer.3": 1559.4584, "encoder_q-layer.4": 1623.9777, "encoder_q-layer.5": 1597.5042, "encoder_q-layer.6": 1627.7981, "encoder_q-layer.7": 1665.4022, "encoder_q-layer.8": 1356.7131, "encoder_q-layer.9": 1126.4799, "epoch": 0.56, "inbatch_neg_score": 0.2185, "inbatch_pos_score": 0.8374, "learning_rate": 2.361111111111111e-05, "loss": 3.4384, "norm_diff": 0.0761, "norm_loss": 0.0, "num_token_doc": 66.819, "num_token_overlap": 14.6993, "num_token_query": 37.5201, "num_token_union": 65.3933, "num_word_context": 202.1438, "num_word_doc": 49.8412, "num_word_query": 28.1273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2459.5196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2181, "query_norm": 1.337, "queue_k_norm": 1.4179, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5201, "sent_len_1": 66.819, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4825, "stdk": 0.0482, "stdq": 0.0435, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4595, "doc_norm": 1.4186, "encoder_q-embeddings": 2081.0317, "encoder_q-layer.0": 1485.3966, "encoder_q-layer.1": 1536.1426, "encoder_q-layer.10": 1126.7676, "encoder_q-layer.11": 3024.6689, "encoder_q-layer.2": 1847.7137, "encoder_q-layer.3": 1809.116, "encoder_q-layer.4": 1807.3212, "encoder_q-layer.5": 1746.4712, "encoder_q-layer.6": 1551.0741, "encoder_q-layer.7": 1381.3203, "encoder_q-layer.8": 1369.9912, "encoder_q-layer.9": 1159.5878, "epoch": 0.56, "inbatch_neg_score": 0.221, "inbatch_pos_score": 0.8452, "learning_rate": 2.3555555555555556e-05, "loss": 3.4595, "norm_diff": 0.0686, "norm_loss": 0.0, "num_token_doc": 66.8493, "num_token_overlap": 14.6, "num_token_query": 37.3321, "num_token_union": 65.4335, "num_word_context": 202.4975, "num_word_doc": 49.8992, "num_word_query": 27.9777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2680.5776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2185, "query_norm": 1.35, "queue_k_norm": 1.4197, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3321, "sent_len_1": 66.8493, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.1413, "stdk": 0.0484, "stdq": 0.0437, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.4437, "doc_norm": 1.4174, "encoder_q-embeddings": 1282.9222, "encoder_q-layer.0": 908.0864, "encoder_q-layer.1": 935.1898, "encoder_q-layer.10": 1193.6429, "encoder_q-layer.11": 3017.7991, "encoder_q-layer.2": 1115.8469, "encoder_q-layer.3": 1139.2406, "encoder_q-layer.4": 1283.3291, "encoder_q-layer.5": 1294.1064, "encoder_q-layer.6": 1341.8284, "encoder_q-layer.7": 1343.373, "encoder_q-layer.8": 1426.412, "encoder_q-layer.9": 1144.5997, "epoch": 0.56, "inbatch_neg_score": 0.2225, "inbatch_pos_score": 0.8398, "learning_rate": 2.35e-05, "loss": 3.4437, "norm_diff": 0.067, "norm_loss": 0.0, "num_token_doc": 66.8184, "num_token_overlap": 14.5966, "num_token_query": 37.3785, "num_token_union": 65.4274, "num_word_context": 202.212, "num_word_doc": 49.862, "num_word_query": 27.9719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2159.4625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2229, "query_norm": 1.3504, "queue_k_norm": 1.4198, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3785, "sent_len_1": 66.8184, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6062, "stdk": 0.0484, "stdq": 0.0438, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.4548, "doc_norm": 1.4201, "encoder_q-embeddings": 1321.0485, "encoder_q-layer.0": 893.4274, "encoder_q-layer.1": 956.9482, "encoder_q-layer.10": 1214.2009, "encoder_q-layer.11": 2979.1414, "encoder_q-layer.2": 1177.135, "encoder_q-layer.3": 1210.3381, "encoder_q-layer.4": 1227.1959, "encoder_q-layer.5": 1289.6903, "encoder_q-layer.6": 1264.0195, "encoder_q-layer.7": 1239.5781, "encoder_q-layer.8": 1416.7531, "encoder_q-layer.9": 1212.1157, "epoch": 0.56, "inbatch_neg_score": 0.2228, "inbatch_pos_score": 0.8564, "learning_rate": 2.3444444444444448e-05, "loss": 3.4548, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.7313, "num_token_overlap": 14.6531, "num_token_query": 37.4926, "num_token_union": 65.4178, "num_word_context": 202.232, "num_word_doc": 49.7608, "num_word_query": 28.0735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2145.5292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2229, "query_norm": 1.3628, "queue_k_norm": 1.4203, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4926, "sent_len_1": 66.7313, "sent_len_max_0": 127.99, "sent_len_max_1": 190.7138, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.448, "doc_norm": 1.4239, "encoder_q-embeddings": 1117.7881, "encoder_q-layer.0": 753.6338, "encoder_q-layer.1": 800.3502, "encoder_q-layer.10": 1051.4979, "encoder_q-layer.11": 2779.0977, "encoder_q-layer.2": 942.9958, "encoder_q-layer.3": 978.2241, "encoder_q-layer.4": 1025.2722, "encoder_q-layer.5": 1057.5614, "encoder_q-layer.6": 1087.5862, "encoder_q-layer.7": 1153.8384, "encoder_q-layer.8": 1223.7438, "encoder_q-layer.9": 1102.9586, "epoch": 0.57, "inbatch_neg_score": 0.2335, "inbatch_pos_score": 0.9033, "learning_rate": 2.338888888888889e-05, "loss": 3.448, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 14.6073, "num_token_query": 37.3571, "num_token_union": 65.3979, "num_word_context": 202.5268, "num_word_doc": 49.8797, "num_word_query": 27.9737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1885.3713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2344, "query_norm": 1.376, "queue_k_norm": 1.4209, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3571, "sent_len_1": 66.7857, "sent_len_max_0": 128.0, "sent_len_max_1": 187.2525, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4409, "doc_norm": 1.4218, "encoder_q-embeddings": 1117.3167, "encoder_q-layer.0": 751.1254, "encoder_q-layer.1": 777.0156, "encoder_q-layer.10": 1076.9794, "encoder_q-layer.11": 2709.7637, "encoder_q-layer.2": 837.3056, "encoder_q-layer.3": 853.8104, "encoder_q-layer.4": 898.2332, "encoder_q-layer.5": 884.1603, "encoder_q-layer.6": 959.2055, "encoder_q-layer.7": 1074.4629, "encoder_q-layer.8": 1256.7522, "encoder_q-layer.9": 1113.6917, "epoch": 0.57, "inbatch_neg_score": 0.2354, "inbatch_pos_score": 0.8657, "learning_rate": 2.3333333333333336e-05, "loss": 3.4409, "norm_diff": 0.0613, "norm_loss": 0.0, "num_token_doc": 66.6462, "num_token_overlap": 14.5544, "num_token_query": 37.3586, "num_token_union": 65.3326, "num_word_context": 202.2756, "num_word_doc": 49.6909, "num_word_query": 27.9618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1816.0823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.3604, "queue_k_norm": 1.4206, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3586, "sent_len_1": 66.6462, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7637, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.44, "doc_norm": 1.4277, "encoder_q-embeddings": 1235.5894, "encoder_q-layer.0": 917.684, "encoder_q-layer.1": 936.8865, "encoder_q-layer.10": 1114.536, "encoder_q-layer.11": 2821.2217, "encoder_q-layer.2": 1090.0631, "encoder_q-layer.3": 1130.0228, "encoder_q-layer.4": 1246.6184, "encoder_q-layer.5": 1130.9138, "encoder_q-layer.6": 1148.3987, "encoder_q-layer.7": 1192.3782, "encoder_q-layer.8": 1254.9012, "encoder_q-layer.9": 1101.2195, "epoch": 0.57, "inbatch_neg_score": 0.2386, "inbatch_pos_score": 0.8823, "learning_rate": 2.3277777777777778e-05, "loss": 3.44, "norm_diff": 0.0813, "norm_loss": 0.0, "num_token_doc": 66.705, "num_token_overlap": 14.6277, "num_token_query": 37.3645, "num_token_union": 65.2961, "num_word_context": 202.3913, "num_word_doc": 49.7802, "num_word_query": 27.9689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2020.4988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2377, "query_norm": 1.3465, "queue_k_norm": 1.4223, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3645, "sent_len_1": 66.705, "sent_len_max_0": 128.0, "sent_len_max_1": 189.33, "stdk": 0.0487, "stdq": 0.0438, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4645, "doc_norm": 1.4211, "encoder_q-embeddings": 1067.171, "encoder_q-layer.0": 740.9438, "encoder_q-layer.1": 753.1913, "encoder_q-layer.10": 1117.7576, "encoder_q-layer.11": 3086.4033, "encoder_q-layer.2": 881.6354, "encoder_q-layer.3": 908.6218, "encoder_q-layer.4": 996.4397, "encoder_q-layer.5": 956.7164, "encoder_q-layer.6": 1021.7866, "encoder_q-layer.7": 1023.4247, "encoder_q-layer.8": 1204.6876, "encoder_q-layer.9": 1055.663, "epoch": 0.57, "inbatch_neg_score": 0.2349, "inbatch_pos_score": 0.8613, "learning_rate": 2.3222222222222224e-05, "loss": 3.4645, "norm_diff": 0.0801, "norm_loss": 0.0, "num_token_doc": 66.5337, "num_token_overlap": 14.5227, "num_token_query": 37.307, "num_token_union": 65.2906, "num_word_context": 201.6953, "num_word_doc": 49.6463, "num_word_query": 27.922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1884.8675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2341, "query_norm": 1.341, "queue_k_norm": 1.4224, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.307, "sent_len_1": 66.5337, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1025, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.4364, "doc_norm": 1.4278, "encoder_q-embeddings": 2506.1208, "encoder_q-layer.0": 1675.3678, "encoder_q-layer.1": 1944.7227, "encoder_q-layer.10": 1132.9869, "encoder_q-layer.11": 2901.144, "encoder_q-layer.2": 2207.2515, "encoder_q-layer.3": 2211.292, "encoder_q-layer.4": 2301.5146, "encoder_q-layer.5": 2258.498, "encoder_q-layer.6": 1998.2395, "encoder_q-layer.7": 2034.6766, "encoder_q-layer.8": 1703.1483, "encoder_q-layer.9": 1118.0747, "epoch": 0.57, "inbatch_neg_score": 0.2399, "inbatch_pos_score": 0.8848, "learning_rate": 2.3166666666666666e-05, "loss": 3.4364, "norm_diff": 0.0803, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 14.5489, "num_token_query": 37.2942, "num_token_union": 65.3548, "num_word_context": 202.2612, "num_word_doc": 49.7386, "num_word_query": 27.9139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3143.4694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2402, "query_norm": 1.3475, "queue_k_norm": 1.4227, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2942, "sent_len_1": 66.7018, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5938, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.4397, "doc_norm": 1.4234, "encoder_q-embeddings": 1518.668, "encoder_q-layer.0": 1078.1908, "encoder_q-layer.1": 1154.2212, "encoder_q-layer.10": 1190.932, "encoder_q-layer.11": 2923.4456, "encoder_q-layer.2": 1298.6866, "encoder_q-layer.3": 1428.3384, "encoder_q-layer.4": 1451.5669, "encoder_q-layer.5": 1593.0735, "encoder_q-layer.6": 1652.821, "encoder_q-layer.7": 1501.2722, "encoder_q-layer.8": 1444.5533, "encoder_q-layer.9": 1187.142, "epoch": 0.57, "inbatch_neg_score": 0.2399, "inbatch_pos_score": 0.8823, "learning_rate": 2.3111111111111112e-05, "loss": 3.4397, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 14.6262, "num_token_query": 37.3419, "num_token_union": 65.3685, "num_word_context": 201.9387, "num_word_doc": 49.878, "num_word_query": 27.9648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2390.6991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.24, "query_norm": 1.3477, "queue_k_norm": 1.4244, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3419, "sent_len_1": 66.7805, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6825, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4459, "doc_norm": 1.4208, "encoder_q-embeddings": 1080.0488, "encoder_q-layer.0": 722.7326, "encoder_q-layer.1": 750.5344, "encoder_q-layer.10": 1185.338, "encoder_q-layer.11": 3112.4243, "encoder_q-layer.2": 838.4286, "encoder_q-layer.3": 865.9668, "encoder_q-layer.4": 916.5347, "encoder_q-layer.5": 986.7312, "encoder_q-layer.6": 1073.4469, "encoder_q-layer.7": 1176.9286, "encoder_q-layer.8": 1401.5739, "encoder_q-layer.9": 1180.9795, "epoch": 0.57, "inbatch_neg_score": 0.2388, "inbatch_pos_score": 0.8325, "learning_rate": 2.3055555555555558e-05, "loss": 3.4459, "norm_diff": 0.104, "norm_loss": 0.0, "num_token_doc": 66.4513, "num_token_overlap": 14.5923, "num_token_query": 37.4311, "num_token_union": 65.2696, "num_word_context": 201.9947, "num_word_doc": 49.6245, "num_word_query": 28.0302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2048.4859, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.3168, "queue_k_norm": 1.4251, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4311, "sent_len_1": 66.4513, "sent_len_max_0": 127.995, "sent_len_max_1": 187.1163, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.4605, "doc_norm": 1.4281, "encoder_q-embeddings": 2328.7458, "encoder_q-layer.0": 1580.9535, "encoder_q-layer.1": 1949.243, "encoder_q-layer.10": 1154.0847, "encoder_q-layer.11": 3022.1543, "encoder_q-layer.2": 2365.3809, "encoder_q-layer.3": 2426.7236, "encoder_q-layer.4": 2277.0789, "encoder_q-layer.5": 2200.8215, "encoder_q-layer.6": 2264.4133, "encoder_q-layer.7": 2231.7214, "encoder_q-layer.8": 2034.5667, "encoder_q-layer.9": 1261.0989, "epoch": 0.57, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.8584, "learning_rate": 2.3000000000000003e-05, "loss": 3.4605, "norm_diff": 0.0971, "norm_loss": 0.0, "num_token_doc": 66.9326, "num_token_overlap": 14.636, "num_token_query": 37.5029, "num_token_union": 65.5278, "num_word_context": 202.4258, "num_word_doc": 49.9692, "num_word_query": 28.1017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3247.8354, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.331, "queue_k_norm": 1.4275, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5029, "sent_len_1": 66.9326, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4725, "stdk": 0.0485, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.4629, "doc_norm": 1.4264, "encoder_q-embeddings": 3623.4429, "encoder_q-layer.0": 2618.8018, "encoder_q-layer.1": 2653.4634, "encoder_q-layer.10": 2298.8901, "encoder_q-layer.11": 5963.0928, "encoder_q-layer.2": 2714.4829, "encoder_q-layer.3": 2841.7214, "encoder_q-layer.4": 2859.9731, "encoder_q-layer.5": 2868.7744, "encoder_q-layer.6": 2849.0913, "encoder_q-layer.7": 2949.0591, "encoder_q-layer.8": 2737.9558, "encoder_q-layer.9": 2326.3132, "epoch": 0.57, "inbatch_neg_score": 0.2326, "inbatch_pos_score": 0.8584, "learning_rate": 2.2944444444444446e-05, "loss": 3.4629, "norm_diff": 0.0939, "norm_loss": 0.0, "num_token_doc": 67.0721, "num_token_overlap": 14.6315, "num_token_query": 37.2903, "num_token_union": 65.5319, "num_word_context": 202.5128, "num_word_doc": 50.0838, "num_word_query": 27.9339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4935.4471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2327, "query_norm": 1.3325, "queue_k_norm": 1.4253, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2903, "sent_len_1": 67.0721, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7875, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4633, "doc_norm": 1.4217, "encoder_q-embeddings": 5094.666, "encoder_q-layer.0": 3697.2163, "encoder_q-layer.1": 4104.5415, "encoder_q-layer.10": 2280.3291, "encoder_q-layer.11": 5816.0332, "encoder_q-layer.2": 4747.7236, "encoder_q-layer.3": 5426.5781, "encoder_q-layer.4": 5139.5205, "encoder_q-layer.5": 4295.0503, "encoder_q-layer.6": 4027.261, "encoder_q-layer.7": 3937.9868, "encoder_q-layer.8": 2864.9075, "encoder_q-layer.9": 2310.9133, "epoch": 0.57, "inbatch_neg_score": 0.2359, "inbatch_pos_score": 0.8701, "learning_rate": 2.288888888888889e-05, "loss": 3.4633, "norm_diff": 0.0906, "norm_loss": 0.0, "num_token_doc": 67.0287, "num_token_overlap": 14.5598, "num_token_query": 37.3127, "num_token_union": 65.5235, "num_word_context": 202.4093, "num_word_doc": 49.9737, "num_word_query": 27.93, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6485.1472, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2355, "query_norm": 1.3311, "queue_k_norm": 1.4265, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3127, "sent_len_1": 67.0287, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.1863, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.4334, "doc_norm": 1.4246, "encoder_q-embeddings": 4130.9829, "encoder_q-layer.0": 3297.7651, "encoder_q-layer.1": 3417.425, "encoder_q-layer.10": 2461.4763, "encoder_q-layer.11": 5865.2871, "encoder_q-layer.2": 3630.6221, "encoder_q-layer.3": 3467.0059, "encoder_q-layer.4": 3568.3567, "encoder_q-layer.5": 3389.019, "encoder_q-layer.6": 3032.2129, "encoder_q-layer.7": 2844.4741, "encoder_q-layer.8": 2881.7034, "encoder_q-layer.9": 2378.5039, "epoch": 0.58, "inbatch_neg_score": 0.2329, "inbatch_pos_score": 0.8682, "learning_rate": 2.2833333333333334e-05, "loss": 3.4334, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 67.125, "num_token_overlap": 14.6842, "num_token_query": 37.4915, "num_token_union": 65.6222, "num_word_context": 202.7612, "num_word_doc": 50.092, "num_word_query": 28.0882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5331.1134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2328, "query_norm": 1.3366, "queue_k_norm": 1.4286, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4915, "sent_len_1": 67.125, "sent_len_max_0": 128.0, "sent_len_max_1": 188.27, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4463, "doc_norm": 1.4278, "encoder_q-embeddings": 3156.2749, "encoder_q-layer.0": 2202.4258, "encoder_q-layer.1": 2269.8049, "encoder_q-layer.10": 2243.8987, "encoder_q-layer.11": 5960.3076, "encoder_q-layer.2": 2336.8689, "encoder_q-layer.3": 2362.8511, "encoder_q-layer.4": 2377.8594, "encoder_q-layer.5": 2386.417, "encoder_q-layer.6": 2165.3159, "encoder_q-layer.7": 2305.7073, "encoder_q-layer.8": 2637.6177, "encoder_q-layer.9": 2314.7197, "epoch": 0.58, "inbatch_neg_score": 0.2314, "inbatch_pos_score": 0.8745, "learning_rate": 2.277777777777778e-05, "loss": 3.4463, "norm_diff": 0.1, "norm_loss": 0.0, "num_token_doc": 66.745, "num_token_overlap": 14.5427, "num_token_query": 37.1597, "num_token_union": 65.3163, "num_word_context": 202.3232, "num_word_doc": 49.8139, "num_word_query": 27.8267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4425.8204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2312, "query_norm": 1.3278, "queue_k_norm": 1.4267, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1597, "sent_len_1": 66.745, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.1875, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4738, "doc_norm": 1.4311, "encoder_q-embeddings": 2094.6572, "encoder_q-layer.0": 1420.3955, "encoder_q-layer.1": 1448.5077, "encoder_q-layer.10": 2451.6484, "encoder_q-layer.11": 5759.4165, "encoder_q-layer.2": 1622.6951, "encoder_q-layer.3": 1637.7656, "encoder_q-layer.4": 1677.6312, "encoder_q-layer.5": 1684.0292, "encoder_q-layer.6": 1922.3856, "encoder_q-layer.7": 2079.7078, "encoder_q-layer.8": 2536.7832, "encoder_q-layer.9": 2234.4282, "epoch": 0.58, "inbatch_neg_score": 0.2352, "inbatch_pos_score": 0.9102, "learning_rate": 2.2722222222222222e-05, "loss": 3.4738, "norm_diff": 0.0995, "norm_loss": 0.0, "num_token_doc": 66.4902, "num_token_overlap": 14.5519, "num_token_query": 37.3319, "num_token_union": 65.2471, "num_word_context": 202.0266, "num_word_doc": 49.6414, "num_word_query": 27.9638, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3711.3983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2354, "query_norm": 1.3316, "queue_k_norm": 1.4261, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3319, "sent_len_1": 66.4902, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1712, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4617, "doc_norm": 1.4323, "encoder_q-embeddings": 9550.749, "encoder_q-layer.0": 6863.3984, "encoder_q-layer.1": 8377.335, "encoder_q-layer.10": 2329.2041, "encoder_q-layer.11": 5630.2847, "encoder_q-layer.2": 10758.8877, "encoder_q-layer.3": 10634.0811, "encoder_q-layer.4": 11846.6045, "encoder_q-layer.5": 11662.9141, "encoder_q-layer.6": 10664.6533, "encoder_q-layer.7": 7180.7358, "encoder_q-layer.8": 4815.1763, "encoder_q-layer.9": 3166.7749, "epoch": 0.58, "inbatch_neg_score": 0.2292, "inbatch_pos_score": 0.8916, "learning_rate": 2.2666666666666668e-05, "loss": 3.4617, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 14.6101, "num_token_query": 37.3489, "num_token_union": 65.4108, "num_word_context": 202.5864, "num_word_doc": 49.8775, "num_word_query": 28.0046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12749.823, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2299, "query_norm": 1.3211, "queue_k_norm": 1.4286, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3489, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 187.1562, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.4349, "doc_norm": 1.4304, "encoder_q-embeddings": 2507.9031, "encoder_q-layer.0": 1770.3656, "encoder_q-layer.1": 1782.1434, "encoder_q-layer.10": 2127.0352, "encoder_q-layer.11": 5614.3613, "encoder_q-layer.2": 1978.0815, "encoder_q-layer.3": 1932.5638, "encoder_q-layer.4": 2105.0549, "encoder_q-layer.5": 2103.1729, "encoder_q-layer.6": 2130.1604, "encoder_q-layer.7": 2283.5583, "encoder_q-layer.8": 2441.1465, "encoder_q-layer.9": 2218.1372, "epoch": 0.58, "inbatch_neg_score": 0.2272, "inbatch_pos_score": 0.8564, "learning_rate": 2.2611111111111113e-05, "loss": 3.4349, "norm_diff": 0.1205, "norm_loss": 0.0, "num_token_doc": 66.8351, "num_token_overlap": 14.6465, "num_token_query": 37.4748, "num_token_union": 65.4622, "num_word_context": 202.1198, "num_word_doc": 49.8663, "num_word_query": 28.0477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3981.5863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2263, "query_norm": 1.3099, "queue_k_norm": 1.4271, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4748, "sent_len_1": 66.8351, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.2862, "stdk": 0.0486, "stdq": 0.0437, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4625, "doc_norm": 1.4285, "encoder_q-embeddings": 2181.8501, "encoder_q-layer.0": 1389.8301, "encoder_q-layer.1": 1451.0756, "encoder_q-layer.10": 2160.8157, "encoder_q-layer.11": 5764.3711, "encoder_q-layer.2": 1581.1127, "encoder_q-layer.3": 1618.9686, "encoder_q-layer.4": 1638.3326, "encoder_q-layer.5": 1695.8186, "encoder_q-layer.6": 1808.375, "encoder_q-layer.7": 1996.3187, "encoder_q-layer.8": 2391.0901, "encoder_q-layer.9": 2243.417, "epoch": 0.58, "inbatch_neg_score": 0.2315, "inbatch_pos_score": 0.8667, "learning_rate": 2.255555555555556e-05, "loss": 3.4625, "norm_diff": 0.1162, "norm_loss": 0.0, "num_token_doc": 66.8987, "num_token_overlap": 14.5678, "num_token_query": 37.1932, "num_token_union": 65.3568, "num_word_context": 202.5402, "num_word_doc": 49.9193, "num_word_query": 27.8581, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3743.6471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2294, "query_norm": 1.3123, "queue_k_norm": 1.4277, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1932, "sent_len_1": 66.8987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4613, "stdk": 0.0485, "stdq": 0.0436, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.4645, "doc_norm": 1.4222, "encoder_q-embeddings": 2166.303, "encoder_q-layer.0": 1465.0507, "encoder_q-layer.1": 1492.8209, "encoder_q-layer.10": 2446.0642, "encoder_q-layer.11": 6411.2905, "encoder_q-layer.2": 1633.8245, "encoder_q-layer.3": 1756.8934, "encoder_q-layer.4": 1781.4473, "encoder_q-layer.5": 1866.1997, "encoder_q-layer.6": 2183.7314, "encoder_q-layer.7": 2336.1509, "encoder_q-layer.8": 2656.7583, "encoder_q-layer.9": 2312.2866, "epoch": 0.58, "inbatch_neg_score": 0.2294, "inbatch_pos_score": 0.8486, "learning_rate": 2.25e-05, "loss": 3.4645, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.9317, "num_token_overlap": 14.5838, "num_token_query": 37.1985, "num_token_union": 65.3825, "num_word_context": 202.5582, "num_word_doc": 49.9689, "num_word_query": 27.8621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4041.134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2295, "query_norm": 1.311, "queue_k_norm": 1.4263, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1985, "sent_len_1": 66.9317, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.985, "stdk": 0.0483, "stdq": 0.0436, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.4563, "doc_norm": 1.4303, "encoder_q-embeddings": 5493.4277, "encoder_q-layer.0": 4632.2324, "encoder_q-layer.1": 4737.6812, "encoder_q-layer.10": 2247.7798, "encoder_q-layer.11": 5666.4614, "encoder_q-layer.2": 5492.4502, "encoder_q-layer.3": 5454.9307, "encoder_q-layer.4": 5155.4863, "encoder_q-layer.5": 4910.7896, "encoder_q-layer.6": 4213.9634, "encoder_q-layer.7": 3350.9536, "encoder_q-layer.8": 2855.5215, "encoder_q-layer.9": 2245.5066, "epoch": 0.58, "inbatch_neg_score": 0.2314, "inbatch_pos_score": 0.8857, "learning_rate": 2.2444444444444447e-05, "loss": 3.4563, "norm_diff": 0.0906, "norm_loss": 0.0, "num_token_doc": 66.5386, "num_token_overlap": 14.5239, "num_token_query": 37.2626, "num_token_union": 65.2152, "num_word_context": 202.0279, "num_word_doc": 49.6177, "num_word_query": 27.9038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6908.0543, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2325, "query_norm": 1.3397, "queue_k_norm": 1.4276, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2626, "sent_len_1": 66.5386, "sent_len_max_0": 128.0, "sent_len_max_1": 190.24, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4445, "doc_norm": 1.4262, "encoder_q-embeddings": 1464.1832, "encoder_q-layer.0": 980.301, "encoder_q-layer.1": 1015.7077, "encoder_q-layer.10": 1142.2083, "encoder_q-layer.11": 2813.2642, "encoder_q-layer.2": 1148.2292, "encoder_q-layer.3": 1266.8296, "encoder_q-layer.4": 1251.4709, "encoder_q-layer.5": 1213.5533, "encoder_q-layer.6": 1219.0631, "encoder_q-layer.7": 1303.7061, "encoder_q-layer.8": 1224.2075, "encoder_q-layer.9": 1038.8052, "epoch": 0.58, "inbatch_neg_score": 0.2348, "inbatch_pos_score": 0.8823, "learning_rate": 2.238888888888889e-05, "loss": 3.4445, "norm_diff": 0.0984, "norm_loss": 0.0, "num_token_doc": 66.6835, "num_token_overlap": 14.6081, "num_token_query": 37.2756, "num_token_union": 65.2935, "num_word_context": 202.1944, "num_word_doc": 49.7578, "num_word_query": 27.877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2123.5158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2345, "query_norm": 1.3277, "queue_k_norm": 1.4276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2756, "sent_len_1": 66.6835, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7537, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4344, "doc_norm": 1.4305, "encoder_q-embeddings": 1622.8884, "encoder_q-layer.0": 1171.0371, "encoder_q-layer.1": 1315.9338, "encoder_q-layer.10": 1156.1581, "encoder_q-layer.11": 2943.7905, "encoder_q-layer.2": 1484.9362, "encoder_q-layer.3": 1536.524, "encoder_q-layer.4": 1545.3962, "encoder_q-layer.5": 1492.4027, "encoder_q-layer.6": 1603.1655, "encoder_q-layer.7": 1583.2542, "encoder_q-layer.8": 1411.3716, "encoder_q-layer.9": 1151.2253, "epoch": 0.58, "inbatch_neg_score": 0.2368, "inbatch_pos_score": 0.877, "learning_rate": 2.2333333333333335e-05, "loss": 3.4344, "norm_diff": 0.0902, "norm_loss": 0.0, "num_token_doc": 66.8267, "num_token_overlap": 14.6463, "num_token_query": 37.4106, "num_token_union": 65.4456, "num_word_context": 202.1196, "num_word_doc": 49.9197, "num_word_query": 28.0028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2418.7811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2366, "query_norm": 1.3404, "queue_k_norm": 1.4302, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4106, "sent_len_1": 66.8267, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4755, "doc_norm": 1.4263, "encoder_q-embeddings": 2455.4563, "encoder_q-layer.0": 1807.647, "encoder_q-layer.1": 2200.4351, "encoder_q-layer.10": 1139.1635, "encoder_q-layer.11": 2863.5747, "encoder_q-layer.2": 2514.3916, "encoder_q-layer.3": 2434.4016, "encoder_q-layer.4": 2453.5044, "encoder_q-layer.5": 2432.593, "encoder_q-layer.6": 1983.5919, "encoder_q-layer.7": 1713.0913, "encoder_q-layer.8": 1482.6108, "encoder_q-layer.9": 1151.2692, "epoch": 0.58, "inbatch_neg_score": 0.2352, "inbatch_pos_score": 0.8545, "learning_rate": 2.2277777777777778e-05, "loss": 3.4755, "norm_diff": 0.0957, "norm_loss": 0.0, "num_token_doc": 66.7842, "num_token_overlap": 14.5587, "num_token_query": 37.2831, "num_token_union": 65.4167, "num_word_context": 202.6953, "num_word_doc": 49.8517, "num_word_query": 27.92, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3202.2511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.235, "query_norm": 1.3306, "queue_k_norm": 1.4288, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2831, "sent_len_1": 66.7842, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4437, "stdk": 0.0484, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.4465, "doc_norm": 1.4212, "encoder_q-embeddings": 1271.304, "encoder_q-layer.0": 892.5599, "encoder_q-layer.1": 932.7964, "encoder_q-layer.10": 1159.3104, "encoder_q-layer.11": 3093.3108, "encoder_q-layer.2": 1062.6172, "encoder_q-layer.3": 1086.6129, "encoder_q-layer.4": 1151.5222, "encoder_q-layer.5": 1052.9606, "encoder_q-layer.6": 1178.8623, "encoder_q-layer.7": 1219.0095, "encoder_q-layer.8": 1413.8262, "encoder_q-layer.9": 1185.6356, "epoch": 0.59, "inbatch_neg_score": 0.236, "inbatch_pos_score": 0.8599, "learning_rate": 2.2222222222222223e-05, "loss": 3.4465, "norm_diff": 0.0909, "norm_loss": 0.0, "num_token_doc": 66.8565, "num_token_overlap": 14.6022, "num_token_query": 37.487, "num_token_union": 65.5316, "num_word_context": 202.3691, "num_word_doc": 49.8882, "num_word_query": 28.0899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2147.5676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2358, "query_norm": 1.3303, "queue_k_norm": 1.4295, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.487, "sent_len_1": 66.8565, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0325, "stdk": 0.0482, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 28.3224, "dev_samples_per_second": 2.26, "dev_steps_per_second": 0.035, "epoch": 0.59, "step": 60000, "test_accuracy": 93.34716796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3969806432723999, "test_doc_norm": 1.4111305475234985, "test_inbatch_neg_score": 0.5828064680099487, "test_inbatch_pos_score": 1.5370218753814697, "test_loss": 0.3969806432723999, "test_loss_align": 1.0703150033950806, "test_loss_unif": 3.8788342475891113, "test_loss_unif_q@queue": 3.8788340091705322, "test_norm_diff": 0.04408515244722366, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.22792485356330872, "test_query_norm": 1.4552156925201416, "test_queue_k_norm": 1.4294852018356323, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04281211644411087, "test_stdq": 0.04321268945932388, "test_stdqueue_k": 0.04862023890018463, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.3224, "dev_samples_per_second": 2.26, "dev_steps_per_second": 0.035, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.35153, "eval_beir-arguana_recall@10": 0.59602, "eval_beir-arguana_recall@100": 0.90683, "eval_beir-arguana_recall@20": 0.72475, "eval_beir-avg_ndcg@10": 0.36595966666666674, "eval_beir-avg_recall@10": 0.4347131666666666, "eval_beir-avg_recall@100": 0.6189631666666666, "eval_beir-avg_recall@20": 0.4933349166666666, "eval_beir-cqadupstack_ndcg@10": 0.2618766666666667, "eval_beir-cqadupstack_recall@10": 0.3555516666666667, "eval_beir-cqadupstack_recall@100": 0.5799316666666666, "eval_beir-cqadupstack_recall@20": 0.4205291666666666, "eval_beir-fiqa_ndcg@10": 0.22406, "eval_beir-fiqa_recall@10": 0.28396, "eval_beir-fiqa_recall@100": 0.53399, "eval_beir-fiqa_recall@20": 0.34678, "eval_beir-nfcorpus_ndcg@10": 0.29253, "eval_beir-nfcorpus_recall@10": 0.14091, "eval_beir-nfcorpus_recall@100": 0.28236, "eval_beir-nfcorpus_recall@20": 0.18121, "eval_beir-nq_ndcg@10": 0.25173, "eval_beir-nq_recall@10": 0.41954, "eval_beir-nq_recall@100": 0.76333, "eval_beir-nq_recall@20": 0.53749, "eval_beir-quora_ndcg@10": 0.7549, "eval_beir-quora_recall@10": 0.87205, "eval_beir-quora_recall@100": 0.9737, "eval_beir-quora_recall@20": 0.91723, "eval_beir-scidocs_ndcg@10": 0.14091, "eval_beir-scidocs_recall@10": 0.15098, "eval_beir-scidocs_recall@100": 0.35292, "eval_beir-scidocs_recall@20": 0.19932, "eval_beir-scifact_ndcg@10": 0.64117, "eval_beir-scifact_recall@10": 0.78456, "eval_beir-scifact_recall@100": 0.90489, "eval_beir-scifact_recall@20": 0.82844, "eval_beir-trec-covid_ndcg@10": 0.5719, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.4534, "eval_beir-trec-covid_recall@20": 0.588, "eval_beir-webis-touche2020_ndcg@10": 0.16899, "eval_beir-webis-touche2020_recall@10": 0.13156, "eval_beir-webis-touche2020_recall@100": 0.43828, "eval_beir-webis-touche2020_recall@20": 0.1896, "eval_senteval-avg_sts": 0.7568099227004118, "eval_senteval-sickr_spearman": 0.7284004430005416, "eval_senteval-stsb_spearman": 0.785219402400282, "step": 60000, "test_accuracy": 93.34716796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3969806432723999, "test_doc_norm": 1.4111305475234985, "test_inbatch_neg_score": 0.5828064680099487, "test_inbatch_pos_score": 1.5370218753814697, "test_loss": 0.3969806432723999, "test_loss_align": 1.0703150033950806, "test_loss_unif": 3.8788342475891113, "test_loss_unif_q@queue": 3.8788340091705322, "test_norm_diff": 0.04408515244722366, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.22792485356330872, "test_query_norm": 1.4552156925201416, "test_queue_k_norm": 1.4294852018356323, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04281211644411087, "test_stdq": 0.04321268945932388, "test_stdqueue_k": 0.04862023890018463, "test_stdqueue_q": 0.0 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4319, "doc_norm": 1.434, "encoder_q-embeddings": 1704.6909, "encoder_q-layer.0": 1108.1604, "encoder_q-layer.1": 1238.5807, "encoder_q-layer.10": 1164.1445, "encoder_q-layer.11": 2804.9417, "encoder_q-layer.2": 1477.8881, "encoder_q-layer.3": 1522.2502, "encoder_q-layer.4": 1669.9265, "encoder_q-layer.5": 1806.913, "encoder_q-layer.6": 1586.8713, "encoder_q-layer.7": 1592.1437, "encoder_q-layer.8": 1413.9148, "encoder_q-layer.9": 1190.3969, "epoch": 0.59, "inbatch_neg_score": 0.2389, "inbatch_pos_score": 0.894, "learning_rate": 2.216666666666667e-05, "loss": 3.4319, "norm_diff": 0.0835, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 14.6068, "num_token_query": 37.3952, "num_token_union": 65.4043, "num_word_context": 202.3758, "num_word_doc": 49.7816, "num_word_query": 27.9915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2404.3179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2378, "query_norm": 1.3505, "queue_k_norm": 1.4294, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3952, "sent_len_1": 66.729, "sent_len_max_0": 128.0, "sent_len_max_1": 188.785, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4526, "doc_norm": 1.4341, "encoder_q-embeddings": 1817.3423, "encoder_q-layer.0": 1234.1849, "encoder_q-layer.1": 1395.3798, "encoder_q-layer.10": 1151.8566, "encoder_q-layer.11": 2979.0789, "encoder_q-layer.2": 1512.9891, "encoder_q-layer.3": 1709.3964, "encoder_q-layer.4": 1942.4027, "encoder_q-layer.5": 2191.9187, "encoder_q-layer.6": 2353.2002, "encoder_q-layer.7": 1921.8682, "encoder_q-layer.8": 1490.902, "encoder_q-layer.9": 1147.0602, "epoch": 0.59, "inbatch_neg_score": 0.2337, "inbatch_pos_score": 0.8887, "learning_rate": 2.211111111111111e-05, "loss": 3.4526, "norm_diff": 0.081, "norm_loss": 0.0, "num_token_doc": 66.6803, "num_token_overlap": 14.5563, "num_token_query": 37.2959, "num_token_union": 65.3383, "num_word_context": 202.2782, "num_word_doc": 49.7345, "num_word_query": 27.9211, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2735.5161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2339, "query_norm": 1.3531, "queue_k_norm": 1.4318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2959, "sent_len_1": 66.6803, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7138, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4439, "doc_norm": 1.4311, "encoder_q-embeddings": 1794.9293, "encoder_q-layer.0": 1275.9551, "encoder_q-layer.1": 1384.8939, "encoder_q-layer.10": 1098.3, "encoder_q-layer.11": 2833.7517, "encoder_q-layer.2": 1604.9497, "encoder_q-layer.3": 1743.9774, "encoder_q-layer.4": 1948.2649, "encoder_q-layer.5": 1750.4153, "encoder_q-layer.6": 1668.2952, "encoder_q-layer.7": 1639.7174, "encoder_q-layer.8": 1503.5404, "encoder_q-layer.9": 1132.1785, "epoch": 0.59, "inbatch_neg_score": 0.2378, "inbatch_pos_score": 0.8906, "learning_rate": 2.2055555555555557e-05, "loss": 3.4439, "norm_diff": 0.0811, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 14.5455, "num_token_query": 37.3694, "num_token_union": 65.4465, "num_word_context": 202.6482, "num_word_doc": 49.8869, "num_word_query": 27.9822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2583.6035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2363, "query_norm": 1.35, "queue_k_norm": 1.4316, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3694, "sent_len_1": 66.8057, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.29, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4363, "doc_norm": 1.4334, "encoder_q-embeddings": 1052.0444, "encoder_q-layer.0": 691.4917, "encoder_q-layer.1": 703.1049, "encoder_q-layer.10": 1197.2578, "encoder_q-layer.11": 2907.4624, "encoder_q-layer.2": 789.895, "encoder_q-layer.3": 794.6334, "encoder_q-layer.4": 836.7784, "encoder_q-layer.5": 853.5983, "encoder_q-layer.6": 954.4485, "encoder_q-layer.7": 1058.6741, "encoder_q-layer.8": 1297.8049, "encoder_q-layer.9": 1205.8756, "epoch": 0.59, "inbatch_neg_score": 0.2394, "inbatch_pos_score": 0.8789, "learning_rate": 2.2000000000000003e-05, "loss": 3.4363, "norm_diff": 0.1025, "norm_loss": 0.0, "num_token_doc": 66.6826, "num_token_overlap": 14.6748, "num_token_query": 37.4709, "num_token_union": 65.3119, "num_word_context": 202.1687, "num_word_doc": 49.7499, "num_word_query": 28.0433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1889.3128, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2404, "query_norm": 1.3309, "queue_k_norm": 1.4297, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4709, "sent_len_1": 66.6826, "sent_len_max_0": 127.995, "sent_len_max_1": 190.2887, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.4464, "doc_norm": 1.4365, "encoder_q-embeddings": 7973.4375, "encoder_q-layer.0": 5906.6182, "encoder_q-layer.1": 6526.8193, "encoder_q-layer.10": 1169.835, "encoder_q-layer.11": 2899.5613, "encoder_q-layer.2": 6892.3389, "encoder_q-layer.3": 7236.6807, "encoder_q-layer.4": 5947.5483, "encoder_q-layer.5": 5285.2256, "encoder_q-layer.6": 5699.166, "encoder_q-layer.7": 5419.9194, "encoder_q-layer.8": 2912.8044, "encoder_q-layer.9": 1304.059, "epoch": 0.59, "inbatch_neg_score": 0.2391, "inbatch_pos_score": 0.9058, "learning_rate": 2.1944444444444445e-05, "loss": 3.4464, "norm_diff": 0.0814, "norm_loss": 0.0, "num_token_doc": 66.7251, "num_token_overlap": 14.5651, "num_token_query": 37.2593, "num_token_union": 65.3644, "num_word_context": 202.2246, "num_word_doc": 49.8238, "num_word_query": 27.8942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8419.6714, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2386, "query_norm": 1.3551, "queue_k_norm": 1.4301, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2593, "sent_len_1": 66.7251, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8587, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.4464, "doc_norm": 1.4312, "encoder_q-embeddings": 1818.868, "encoder_q-layer.0": 1274.127, "encoder_q-layer.1": 1559.97, "encoder_q-layer.10": 1110.6006, "encoder_q-layer.11": 2899.699, "encoder_q-layer.2": 1830.6068, "encoder_q-layer.3": 1800.6261, "encoder_q-layer.4": 1996.5995, "encoder_q-layer.5": 1611.4254, "encoder_q-layer.6": 1542.5214, "encoder_q-layer.7": 1287.0402, "encoder_q-layer.8": 1304.6807, "encoder_q-layer.9": 1128.1792, "epoch": 0.59, "inbatch_neg_score": 0.2405, "inbatch_pos_score": 0.8682, "learning_rate": 2.188888888888889e-05, "loss": 3.4464, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.9153, "num_token_overlap": 14.5221, "num_token_query": 37.1351, "num_token_union": 65.3705, "num_word_context": 202.3756, "num_word_doc": 49.9419, "num_word_query": 27.7711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2505.9414, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2395, "query_norm": 1.3182, "queue_k_norm": 1.4323, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1351, "sent_len_1": 66.9153, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1012, "stdk": 0.0485, "stdq": 0.0436, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.4227, "doc_norm": 1.4242, "encoder_q-embeddings": 2445.7446, "encoder_q-layer.0": 1824.5548, "encoder_q-layer.1": 2091.7812, "encoder_q-layer.10": 1203.2064, "encoder_q-layer.11": 3091.321, "encoder_q-layer.2": 2455.3081, "encoder_q-layer.3": 2810.6169, "encoder_q-layer.4": 2561.3782, "encoder_q-layer.5": 2000.8224, "encoder_q-layer.6": 2128.2507, "encoder_q-layer.7": 1939.5636, "encoder_q-layer.8": 1521.8767, "encoder_q-layer.9": 1237.5115, "epoch": 0.59, "inbatch_neg_score": 0.2444, "inbatch_pos_score": 0.8623, "learning_rate": 2.1833333333333333e-05, "loss": 3.4227, "norm_diff": 0.1027, "norm_loss": 0.0, "num_token_doc": 67.0449, "num_token_overlap": 14.6757, "num_token_query": 37.4786, "num_token_union": 65.5802, "num_word_context": 202.3037, "num_word_doc": 50.0503, "num_word_query": 28.0528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3271.2133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.243, "query_norm": 1.3215, "queue_k_norm": 1.4336, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4786, "sent_len_1": 67.0449, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1413, "stdk": 0.0482, "stdq": 0.0437, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.4329, "doc_norm": 1.4278, "encoder_q-embeddings": 975.4393, "encoder_q-layer.0": 650.4205, "encoder_q-layer.1": 680.1886, "encoder_q-layer.10": 1081.7003, "encoder_q-layer.11": 2766.9604, "encoder_q-layer.2": 760.7114, "encoder_q-layer.3": 795.2277, "encoder_q-layer.4": 819.7654, "encoder_q-layer.5": 813.0114, "encoder_q-layer.6": 927.6866, "encoder_q-layer.7": 1016.0007, "encoder_q-layer.8": 1208.793, "encoder_q-layer.9": 1062.9507, "epoch": 0.59, "inbatch_neg_score": 0.2452, "inbatch_pos_score": 0.9082, "learning_rate": 2.177777777777778e-05, "loss": 3.4329, "norm_diff": 0.0822, "norm_loss": 0.0, "num_token_doc": 66.6286, "num_token_overlap": 14.4958, "num_token_query": 37.2434, "num_token_union": 65.3052, "num_word_context": 202.1377, "num_word_doc": 49.7322, "num_word_query": 27.8738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1772.0534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2451, "query_norm": 1.3456, "queue_k_norm": 1.4324, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2434, "sent_len_1": 66.6286, "sent_len_max_0": 127.995, "sent_len_max_1": 187.3413, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4295, "doc_norm": 1.433, "encoder_q-embeddings": 1881.1035, "encoder_q-layer.0": 1452.1713, "encoder_q-layer.1": 1538.9321, "encoder_q-layer.10": 1133.6184, "encoder_q-layer.11": 2766.6792, "encoder_q-layer.2": 1694.0775, "encoder_q-layer.3": 1715.0121, "encoder_q-layer.4": 1963.7072, "encoder_q-layer.5": 1839.3187, "encoder_q-layer.6": 1767.6082, "encoder_q-layer.7": 1934.6544, "encoder_q-layer.8": 1737.8689, "encoder_q-layer.9": 1202.1444, "epoch": 0.59, "inbatch_neg_score": 0.2414, "inbatch_pos_score": 0.8906, "learning_rate": 2.1722222222222225e-05, "loss": 3.4295, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.9477, "num_token_overlap": 14.5625, "num_token_query": 37.2224, "num_token_union": 65.4732, "num_word_context": 202.5164, "num_word_doc": 49.9902, "num_word_query": 27.8677, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2698.4883, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2406, "query_norm": 1.3319, "queue_k_norm": 1.434, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2224, "sent_len_1": 66.9477, "sent_len_max_0": 128.0, "sent_len_max_1": 187.1238, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4631, "doc_norm": 1.434, "encoder_q-embeddings": 1314.4542, "encoder_q-layer.0": 895.7803, "encoder_q-layer.1": 964.962, "encoder_q-layer.10": 1248.5699, "encoder_q-layer.11": 2777.9641, "encoder_q-layer.2": 1183.5443, "encoder_q-layer.3": 1215.2029, "encoder_q-layer.4": 1279.214, "encoder_q-layer.5": 1276.5526, "encoder_q-layer.6": 1238.8644, "encoder_q-layer.7": 1263.1302, "encoder_q-layer.8": 1346.4474, "encoder_q-layer.9": 1171.1462, "epoch": 0.6, "inbatch_neg_score": 0.252, "inbatch_pos_score": 0.9062, "learning_rate": 2.1666666666666667e-05, "loss": 3.4631, "norm_diff": 0.0615, "norm_loss": 0.0, "num_token_doc": 66.7255, "num_token_overlap": 14.5888, "num_token_query": 37.2992, "num_token_union": 65.328, "num_word_context": 202.629, "num_word_doc": 49.8217, "num_word_query": 27.9452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2077.2371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.3724, "queue_k_norm": 1.4332, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2992, "sent_len_1": 66.7255, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.5475, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.447, "doc_norm": 1.4362, "encoder_q-embeddings": 1300.9583, "encoder_q-layer.0": 864.6321, "encoder_q-layer.1": 1010.6966, "encoder_q-layer.10": 1051.7566, "encoder_q-layer.11": 2892.6831, "encoder_q-layer.2": 1150.429, "encoder_q-layer.3": 1234.5409, "encoder_q-layer.4": 1384.9869, "encoder_q-layer.5": 1408.915, "encoder_q-layer.6": 1269.2451, "encoder_q-layer.7": 1146.7191, "encoder_q-layer.8": 1275.4336, "encoder_q-layer.9": 1092.5632, "epoch": 0.6, "inbatch_neg_score": 0.2546, "inbatch_pos_score": 0.9175, "learning_rate": 2.1611111111111113e-05, "loss": 3.447, "norm_diff": 0.083, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 14.5544, "num_token_query": 37.3456, "num_token_union": 65.306, "num_word_context": 202.3305, "num_word_doc": 49.7171, "num_word_query": 27.954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2061.1212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2561, "query_norm": 1.3532, "queue_k_norm": 1.4344, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3456, "sent_len_1": 66.6523, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.4238, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4334, "doc_norm": 1.4314, "encoder_q-embeddings": 986.7968, "encoder_q-layer.0": 661.3712, "encoder_q-layer.1": 662.4185, "encoder_q-layer.10": 1142.9714, "encoder_q-layer.11": 3037.4976, "encoder_q-layer.2": 744.8915, "encoder_q-layer.3": 770.8619, "encoder_q-layer.4": 776.7513, "encoder_q-layer.5": 764.7445, "encoder_q-layer.6": 876.1525, "encoder_q-layer.7": 993.8372, "encoder_q-layer.8": 1191.9327, "encoder_q-layer.9": 1116.6667, "epoch": 0.6, "inbatch_neg_score": 0.259, "inbatch_pos_score": 0.8975, "learning_rate": 2.1555555555555555e-05, "loss": 3.4334, "norm_diff": 0.0785, "norm_loss": 0.0, "num_token_doc": 66.6289, "num_token_overlap": 14.5127, "num_token_query": 37.2634, "num_token_union": 65.3363, "num_word_context": 202.308, "num_word_doc": 49.7405, "num_word_query": 27.9142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1768.4835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2595, "query_norm": 1.3529, "queue_k_norm": 1.4368, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2634, "sent_len_1": 66.6289, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0712, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.4363, "doc_norm": 1.4405, "encoder_q-embeddings": 1006.5327, "encoder_q-layer.0": 696.3225, "encoder_q-layer.1": 702.4572, "encoder_q-layer.10": 1054.6794, "encoder_q-layer.11": 2618.1633, "encoder_q-layer.2": 805.9098, "encoder_q-layer.3": 847.1161, "encoder_q-layer.4": 858.894, "encoder_q-layer.5": 897.805, "encoder_q-layer.6": 1001.9418, "encoder_q-layer.7": 1052.256, "encoder_q-layer.8": 1226.3442, "encoder_q-layer.9": 1075.2717, "epoch": 0.6, "inbatch_neg_score": 0.2638, "inbatch_pos_score": 0.9385, "learning_rate": 2.15e-05, "loss": 3.4363, "norm_diff": 0.0827, "norm_loss": 0.0, "num_token_doc": 66.6399, "num_token_overlap": 14.6186, "num_token_query": 37.4004, "num_token_union": 65.3428, "num_word_context": 202.0443, "num_word_doc": 49.7736, "num_word_query": 28.0009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1735.6249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2627, "query_norm": 1.3577, "queue_k_norm": 1.4365, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4004, "sent_len_1": 66.6399, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.6562, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4349, "doc_norm": 1.4382, "encoder_q-embeddings": 1127.4889, "encoder_q-layer.0": 760.696, "encoder_q-layer.1": 801.3994, "encoder_q-layer.10": 1237.2257, "encoder_q-layer.11": 2836.9937, "encoder_q-layer.2": 923.718, "encoder_q-layer.3": 997.6824, "encoder_q-layer.4": 1009.8743, "encoder_q-layer.5": 1021.0933, "encoder_q-layer.6": 1021.157, "encoder_q-layer.7": 1076.7845, "encoder_q-layer.8": 1225.1041, "encoder_q-layer.9": 1121.2694, "epoch": 0.6, "inbatch_neg_score": 0.2698, "inbatch_pos_score": 0.9072, "learning_rate": 2.1444444444444443e-05, "loss": 3.4349, "norm_diff": 0.0652, "norm_loss": 0.0, "num_token_doc": 66.8462, "num_token_overlap": 14.615, "num_token_query": 37.4001, "num_token_union": 65.4624, "num_word_context": 202.1623, "num_word_doc": 49.876, "num_word_query": 27.9839, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1867.9037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2688, "query_norm": 1.373, "queue_k_norm": 1.4362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4001, "sent_len_1": 66.8462, "sent_len_max_0": 127.995, "sent_len_max_1": 189.635, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.457, "doc_norm": 1.4436, "encoder_q-embeddings": 1278.467, "encoder_q-layer.0": 891.9302, "encoder_q-layer.1": 955.5161, "encoder_q-layer.10": 1223.6494, "encoder_q-layer.11": 2965.4431, "encoder_q-layer.2": 1069.6099, "encoder_q-layer.3": 1138.5802, "encoder_q-layer.4": 1188.8287, "encoder_q-layer.5": 1244.0939, "encoder_q-layer.6": 1213.1304, "encoder_q-layer.7": 1180.1238, "encoder_q-layer.8": 1262.299, "encoder_q-layer.9": 1116.6182, "epoch": 0.6, "inbatch_neg_score": 0.2756, "inbatch_pos_score": 0.9116, "learning_rate": 2.138888888888889e-05, "loss": 3.457, "norm_diff": 0.068, "norm_loss": 0.0, "num_token_doc": 66.7003, "num_token_overlap": 14.5732, "num_token_query": 37.2834, "num_token_union": 65.3122, "num_word_context": 202.4464, "num_word_doc": 49.7589, "num_word_query": 27.9174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2026.4401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.3756, "queue_k_norm": 1.437, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2834, "sent_len_1": 66.7003, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.075, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4089, "doc_norm": 1.4346, "encoder_q-embeddings": 1140.0349, "encoder_q-layer.0": 772.592, "encoder_q-layer.1": 804.4825, "encoder_q-layer.10": 1164.6287, "encoder_q-layer.11": 2750.0298, "encoder_q-layer.2": 946.1088, "encoder_q-layer.3": 988.8196, "encoder_q-layer.4": 1023.8488, "encoder_q-layer.5": 1017.9288, "encoder_q-layer.6": 1136.1493, "encoder_q-layer.7": 1177.8955, "encoder_q-layer.8": 1273.7861, "encoder_q-layer.9": 1142.046, "epoch": 0.6, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.9316, "learning_rate": 2.1333333333333335e-05, "loss": 3.4089, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.8233, "num_token_overlap": 14.5697, "num_token_query": 37.2364, "num_token_union": 65.3646, "num_word_context": 202.2452, "num_word_doc": 49.8666, "num_word_query": 27.8871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1902.0718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.3841, "queue_k_norm": 1.4379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2364, "sent_len_1": 66.8233, "sent_len_max_0": 128.0, "sent_len_max_1": 189.67, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4308, "doc_norm": 1.4412, "encoder_q-embeddings": 3098.9626, "encoder_q-layer.0": 2202.0872, "encoder_q-layer.1": 2539.1704, "encoder_q-layer.10": 2586.4919, "encoder_q-layer.11": 5651.3208, "encoder_q-layer.2": 3067.3105, "encoder_q-layer.3": 3545.6189, "encoder_q-layer.4": 3680.6848, "encoder_q-layer.5": 3652.5391, "encoder_q-layer.6": 4028.1096, "encoder_q-layer.7": 3912.2429, "encoder_q-layer.8": 2926.2166, "encoder_q-layer.9": 2305.6265, "epoch": 0.6, "inbatch_neg_score": 0.2873, "inbatch_pos_score": 0.9399, "learning_rate": 2.127777777777778e-05, "loss": 3.4308, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.8703, "num_token_overlap": 14.5819, "num_token_query": 37.2185, "num_token_union": 65.3777, "num_word_context": 202.1993, "num_word_doc": 49.8915, "num_word_query": 27.8666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5183.6072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.3876, "queue_k_norm": 1.4399, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2185, "sent_len_1": 66.8703, "sent_len_max_0": 128.0, "sent_len_max_1": 189.325, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.4111, "doc_norm": 1.4427, "encoder_q-embeddings": 3837.6665, "encoder_q-layer.0": 2517.4253, "encoder_q-layer.1": 2695.2959, "encoder_q-layer.10": 2596.8958, "encoder_q-layer.11": 6094.915, "encoder_q-layer.2": 3080.1641, "encoder_q-layer.3": 3322.415, "encoder_q-layer.4": 3797.093, "encoder_q-layer.5": 3686.2795, "encoder_q-layer.6": 3553.0554, "encoder_q-layer.7": 3367.2617, "encoder_q-layer.8": 3075.5554, "encoder_q-layer.9": 2422.5261, "epoch": 0.6, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.9087, "learning_rate": 2.1222222222222223e-05, "loss": 3.4111, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.9599, "num_token_overlap": 14.6464, "num_token_query": 37.4599, "num_token_union": 65.5828, "num_word_context": 202.4929, "num_word_doc": 50.0101, "num_word_query": 28.0692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5278.2174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2893, "query_norm": 1.3635, "queue_k_norm": 1.444, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4599, "sent_len_1": 66.9599, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.3438, "stdk": 0.0487, "stdq": 0.0437, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4313, "doc_norm": 1.4432, "encoder_q-embeddings": 2098.7661, "encoder_q-layer.0": 1405.5559, "encoder_q-layer.1": 1478.7167, "encoder_q-layer.10": 2226.3386, "encoder_q-layer.11": 5622.9761, "encoder_q-layer.2": 1639.0901, "encoder_q-layer.3": 1701.4415, "encoder_q-layer.4": 1792.3225, "encoder_q-layer.5": 1809.9806, "encoder_q-layer.6": 1909.6746, "encoder_q-layer.7": 2050.7207, "encoder_q-layer.8": 2406.8589, "encoder_q-layer.9": 2217.4641, "epoch": 0.6, "inbatch_neg_score": 0.2878, "inbatch_pos_score": 0.937, "learning_rate": 2.116666666666667e-05, "loss": 3.4313, "norm_diff": 0.0759, "norm_loss": 0.0, "num_token_doc": 66.9893, "num_token_overlap": 14.5916, "num_token_query": 37.3678, "num_token_union": 65.5267, "num_word_context": 202.5876, "num_word_doc": 50.0056, "num_word_query": 27.9709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3619.033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.3673, "queue_k_norm": 1.443, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3678, "sent_len_1": 66.9893, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9538, "stdk": 0.0487, "stdq": 0.0438, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.4301, "doc_norm": 1.4413, "encoder_q-embeddings": 1943.6318, "encoder_q-layer.0": 1275.0912, "encoder_q-layer.1": 1330.3542, "encoder_q-layer.10": 2385.5737, "encoder_q-layer.11": 5586.3682, "encoder_q-layer.2": 1500.3273, "encoder_q-layer.3": 1507.4021, "encoder_q-layer.4": 1584.8453, "encoder_q-layer.5": 1606.3567, "encoder_q-layer.6": 1861.0333, "encoder_q-layer.7": 2007.9622, "encoder_q-layer.8": 2535.7202, "encoder_q-layer.9": 2190.771, "epoch": 0.61, "inbatch_neg_score": 0.2929, "inbatch_pos_score": 0.9331, "learning_rate": 2.111111111111111e-05, "loss": 3.4301, "norm_diff": 0.075, "norm_loss": 0.0, "num_token_doc": 66.5831, "num_token_overlap": 14.5385, "num_token_query": 37.2731, "num_token_union": 65.3077, "num_word_context": 202.4425, "num_word_doc": 49.7005, "num_word_query": 27.8723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3541.6913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.3663, "queue_k_norm": 1.4437, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2731, "sent_len_1": 66.5831, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5375, "stdk": 0.0485, "stdq": 0.0439, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4237, "doc_norm": 1.4442, "encoder_q-embeddings": 2394.6851, "encoder_q-layer.0": 1603.9901, "encoder_q-layer.1": 1658.5702, "encoder_q-layer.10": 2361.3611, "encoder_q-layer.11": 5620.8843, "encoder_q-layer.2": 1907.8805, "encoder_q-layer.3": 2094.2869, "encoder_q-layer.4": 2269.7146, "encoder_q-layer.5": 2311.3, "encoder_q-layer.6": 2350.3047, "encoder_q-layer.7": 2340.3938, "encoder_q-layer.8": 2491.8823, "encoder_q-layer.9": 2297.3223, "epoch": 0.61, "inbatch_neg_score": 0.2903, "inbatch_pos_score": 0.9277, "learning_rate": 2.1055555555555556e-05, "loss": 3.4237, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.6205, "num_token_overlap": 14.569, "num_token_query": 37.2317, "num_token_union": 65.2578, "num_word_context": 201.6357, "num_word_doc": 49.7581, "num_word_query": 27.9006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3912.0954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3646, "queue_k_norm": 1.4449, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2317, "sent_len_1": 66.6205, "sent_len_max_0": 128.0, "sent_len_max_1": 186.82, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4266, "doc_norm": 1.4543, "encoder_q-embeddings": 3275.7439, "encoder_q-layer.0": 2334.2595, "encoder_q-layer.1": 2628.8772, "encoder_q-layer.10": 2265.4338, "encoder_q-layer.11": 5845.0723, "encoder_q-layer.2": 3291.4023, "encoder_q-layer.3": 3300.5752, "encoder_q-layer.4": 3458.7585, "encoder_q-layer.5": 3167.1304, "encoder_q-layer.6": 3092.3511, "encoder_q-layer.7": 3003.5107, "encoder_q-layer.8": 2912.6326, "encoder_q-layer.9": 2391.8459, "epoch": 0.61, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 0.9424, "learning_rate": 2.1e-05, "loss": 3.4266, "norm_diff": 0.0745, "norm_loss": 0.0, "num_token_doc": 66.8798, "num_token_overlap": 14.5813, "num_token_query": 37.4544, "num_token_union": 65.4775, "num_word_context": 202.6312, "num_word_doc": 49.9137, "num_word_query": 28.0747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4826.3879, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3798, "queue_k_norm": 1.4459, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4544, "sent_len_1": 66.8798, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0513, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4348, "doc_norm": 1.4428, "encoder_q-embeddings": 49168.1992, "encoder_q-layer.0": 34218.8711, "encoder_q-layer.1": 39731.2227, "encoder_q-layer.10": 2235.594, "encoder_q-layer.11": 5497.6812, "encoder_q-layer.2": 46530.4297, "encoder_q-layer.3": 46126.3633, "encoder_q-layer.4": 50921.9727, "encoder_q-layer.5": 53439.5703, "encoder_q-layer.6": 44997.8164, "encoder_q-layer.7": 29861.3848, "encoder_q-layer.8": 15369.7109, "encoder_q-layer.9": 4527.0234, "epoch": 0.61, "inbatch_neg_score": 0.2764, "inbatch_pos_score": 0.9062, "learning_rate": 2.0944444444444445e-05, "loss": 3.4348, "norm_diff": 0.0959, "norm_loss": 0.0, "num_token_doc": 66.6122, "num_token_overlap": 14.4945, "num_token_query": 37.1254, "num_token_union": 65.2241, "num_word_context": 202.2125, "num_word_doc": 49.7372, "num_word_query": 27.7715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 56880.4902, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.2759, "query_norm": 1.347, "queue_k_norm": 1.4466, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1254, "sent_len_1": 66.6122, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.86, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.417, "doc_norm": 1.4477, "encoder_q-embeddings": 2693.0034, "encoder_q-layer.0": 1813.2834, "encoder_q-layer.1": 2009.9567, "encoder_q-layer.10": 2393.571, "encoder_q-layer.11": 5959.6519, "encoder_q-layer.2": 2303.1772, "encoder_q-layer.3": 2252.948, "encoder_q-layer.4": 2462.4993, "encoder_q-layer.5": 2349.5369, "encoder_q-layer.6": 2557.8579, "encoder_q-layer.7": 2505.5083, "encoder_q-layer.8": 2564.938, "encoder_q-layer.9": 2358.7786, "epoch": 0.61, "inbatch_neg_score": 0.2802, "inbatch_pos_score": 0.9258, "learning_rate": 2.088888888888889e-05, "loss": 3.417, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.8283, "num_token_overlap": 14.589, "num_token_query": 37.1904, "num_token_union": 65.2919, "num_word_context": 202.2316, "num_word_doc": 49.9192, "num_word_query": 27.835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4270.4834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2795, "query_norm": 1.3721, "queue_k_norm": 1.4471, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1904, "sent_len_1": 66.8283, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9162, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.4462, "doc_norm": 1.4497, "encoder_q-embeddings": 3751.4863, "encoder_q-layer.0": 2507.9551, "encoder_q-layer.1": 2815.4575, "encoder_q-layer.10": 2226.3745, "encoder_q-layer.11": 5567.7456, "encoder_q-layer.2": 3384.2483, "encoder_q-layer.3": 3517.3828, "encoder_q-layer.4": 4375.3472, "encoder_q-layer.5": 4245.4575, "encoder_q-layer.6": 4057.3213, "encoder_q-layer.7": 3286.0276, "encoder_q-layer.8": 2682.8735, "encoder_q-layer.9": 2279.895, "epoch": 0.61, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.9404, "learning_rate": 2.0833333333333336e-05, "loss": 3.4462, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.4813, "num_token_overlap": 14.5379, "num_token_query": 37.3893, "num_token_union": 65.3233, "num_word_context": 202.3559, "num_word_doc": 49.6077, "num_word_query": 28.0037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5352.6215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3739, "queue_k_norm": 1.4474, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3893, "sent_len_1": 66.4813, "sent_len_max_0": 128.0, "sent_len_max_1": 190.36, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4342, "doc_norm": 1.446, "encoder_q-embeddings": 1301.3059, "encoder_q-layer.0": 968.3423, "encoder_q-layer.1": 1025.6418, "encoder_q-layer.10": 1042.5574, "encoder_q-layer.11": 3007.1575, "encoder_q-layer.2": 1119.9878, "encoder_q-layer.3": 1244.5305, "encoder_q-layer.4": 1295.4847, "encoder_q-layer.5": 999.5643, "encoder_q-layer.6": 1038.4242, "encoder_q-layer.7": 1222.4019, "encoder_q-layer.8": 1197.7314, "encoder_q-layer.9": 1088.7245, "epoch": 0.61, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 0.9448, "learning_rate": 2.077777777777778e-05, "loss": 3.4342, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.7328, "num_token_overlap": 14.5637, "num_token_query": 37.2272, "num_token_union": 65.2821, "num_word_context": 202.2048, "num_word_doc": 49.8076, "num_word_query": 27.8751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2104.4973, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.3644, "queue_k_norm": 1.4487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2272, "sent_len_1": 66.7328, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4638, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.4256, "doc_norm": 1.4468, "encoder_q-embeddings": 1615.1062, "encoder_q-layer.0": 1163.7632, "encoder_q-layer.1": 1336.2979, "encoder_q-layer.10": 1132.2129, "encoder_q-layer.11": 3130.8726, "encoder_q-layer.2": 1634.1423, "encoder_q-layer.3": 1696.0284, "encoder_q-layer.4": 1803.4458, "encoder_q-layer.5": 1749.8931, "encoder_q-layer.6": 1630.7145, "encoder_q-layer.7": 1551.6523, "encoder_q-layer.8": 1378.2147, "encoder_q-layer.9": 1116.3851, "epoch": 0.61, "inbatch_neg_score": 0.2866, "inbatch_pos_score": 0.9106, "learning_rate": 2.0722222222222224e-05, "loss": 3.4256, "norm_diff": 0.1075, "norm_loss": 0.0, "num_token_doc": 66.5667, "num_token_overlap": 14.5484, "num_token_query": 37.2994, "num_token_union": 65.248, "num_word_context": 201.9783, "num_word_doc": 49.6806, "num_word_query": 27.9231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2551.0902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2859, "query_norm": 1.3393, "queue_k_norm": 1.4479, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2994, "sent_len_1": 66.5667, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0, "stdk": 0.0485, "stdq": 0.0437, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4054, "doc_norm": 1.4545, "encoder_q-embeddings": 996.7964, "encoder_q-layer.0": 651.8854, "encoder_q-layer.1": 687.0096, "encoder_q-layer.10": 1093.0657, "encoder_q-layer.11": 3140.7336, "encoder_q-layer.2": 748.6811, "encoder_q-layer.3": 754.4879, "encoder_q-layer.4": 782.8444, "encoder_q-layer.5": 804.7803, "encoder_q-layer.6": 920.3378, "encoder_q-layer.7": 981.692, "encoder_q-layer.8": 1183.4406, "encoder_q-layer.9": 1100.2179, "epoch": 0.61, "inbatch_neg_score": 0.2861, "inbatch_pos_score": 0.9287, "learning_rate": 2.0666666666666666e-05, "loss": 3.4054, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.6812, "num_token_overlap": 14.6041, "num_token_query": 37.2593, "num_token_union": 65.2288, "num_word_context": 201.8558, "num_word_doc": 49.7825, "num_word_query": 27.8816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1857.4, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.3601, "queue_k_norm": 1.4507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2593, "sent_len_1": 66.6812, "sent_len_max_0": 128.0, "sent_len_max_1": 188.385, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4408, "doc_norm": 1.4477, "encoder_q-embeddings": 1632.3286, "encoder_q-layer.0": 1039.7299, "encoder_q-layer.1": 1194.5441, "encoder_q-layer.10": 1201.0327, "encoder_q-layer.11": 2979.6685, "encoder_q-layer.2": 1442.4113, "encoder_q-layer.3": 1480.5605, "encoder_q-layer.4": 1678.3412, "encoder_q-layer.5": 1752.6238, "encoder_q-layer.6": 1563.9989, "encoder_q-layer.7": 1320.4658, "encoder_q-layer.8": 1369.8645, "encoder_q-layer.9": 1175.2725, "epoch": 0.61, "inbatch_neg_score": 0.2838, "inbatch_pos_score": 0.916, "learning_rate": 2.0611111111111112e-05, "loss": 3.4408, "norm_diff": 0.082, "norm_loss": 0.0, "num_token_doc": 66.5699, "num_token_overlap": 14.5081, "num_token_query": 37.2346, "num_token_union": 65.287, "num_word_context": 202.0562, "num_word_doc": 49.6501, "num_word_query": 27.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2406.8274, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2837, "query_norm": 1.3657, "queue_k_norm": 1.4503, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2346, "sent_len_1": 66.5699, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1525, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4342, "doc_norm": 1.4568, "encoder_q-embeddings": 1209.2565, "encoder_q-layer.0": 822.3146, "encoder_q-layer.1": 873.6108, "encoder_q-layer.10": 1183.8469, "encoder_q-layer.11": 3000.5491, "encoder_q-layer.2": 1002.3441, "encoder_q-layer.3": 1040.203, "encoder_q-layer.4": 1066.0522, "encoder_q-layer.5": 1032.6046, "encoder_q-layer.6": 1145.0748, "encoder_q-layer.7": 1108.3876, "encoder_q-layer.8": 1264.6855, "encoder_q-layer.9": 1141.4569, "epoch": 0.62, "inbatch_neg_score": 0.2855, "inbatch_pos_score": 0.939, "learning_rate": 2.0555555555555555e-05, "loss": 3.4342, "norm_diff": 0.105, "norm_loss": 0.0, "num_token_doc": 66.9099, "num_token_overlap": 14.5339, "num_token_query": 37.2313, "num_token_union": 65.4497, "num_word_context": 202.5863, "num_word_doc": 49.9231, "num_word_query": 27.8674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1993.5917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3518, "queue_k_norm": 1.4505, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2313, "sent_len_1": 66.9099, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9988, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.4359, "doc_norm": 1.4506, "encoder_q-embeddings": 1057.2242, "encoder_q-layer.0": 710.8634, "encoder_q-layer.1": 724.5639, "encoder_q-layer.10": 1187.98, "encoder_q-layer.11": 2931.4873, "encoder_q-layer.2": 807.2329, "encoder_q-layer.3": 831.2141, "encoder_q-layer.4": 848.5958, "encoder_q-layer.5": 847.7016, "encoder_q-layer.6": 898.2676, "encoder_q-layer.7": 986.0998, "encoder_q-layer.8": 1234.1665, "encoder_q-layer.9": 1164.5226, "epoch": 0.62, "inbatch_neg_score": 0.28, "inbatch_pos_score": 0.8931, "learning_rate": 2.05e-05, "loss": 3.4359, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.6835, "num_token_overlap": 14.5139, "num_token_query": 37.314, "num_token_union": 65.3768, "num_word_context": 202.1782, "num_word_doc": 49.7721, "num_word_query": 27.9301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1887.7986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.3435, "queue_k_norm": 1.4525, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.314, "sent_len_1": 66.6835, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.89, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4465, "doc_norm": 1.4491, "encoder_q-embeddings": 3904.2969, "encoder_q-layer.0": 3441.3594, "encoder_q-layer.1": 4531.957, "encoder_q-layer.10": 1141.9308, "encoder_q-layer.11": 3139.9182, "encoder_q-layer.2": 5744.8521, "encoder_q-layer.3": 4896.1069, "encoder_q-layer.4": 5213.4512, "encoder_q-layer.5": 3959.0688, "encoder_q-layer.6": 3193.2144, "encoder_q-layer.7": 2818.3662, "encoder_q-layer.8": 2264.5554, "encoder_q-layer.9": 1335.7423, "epoch": 0.62, "inbatch_neg_score": 0.2857, "inbatch_pos_score": 0.9292, "learning_rate": 2.0444444444444446e-05, "loss": 3.4465, "norm_diff": 0.1007, "norm_loss": 0.0, "num_token_doc": 66.6071, "num_token_overlap": 14.5045, "num_token_query": 37.2121, "num_token_union": 65.2575, "num_word_context": 202.1106, "num_word_doc": 49.7214, "num_word_query": 27.8521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5889.9853, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2861, "query_norm": 1.3484, "queue_k_norm": 1.4518, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2121, "sent_len_1": 66.6071, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9137, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4313, "doc_norm": 1.4574, "encoder_q-embeddings": 2398.3269, "encoder_q-layer.0": 1659.6973, "encoder_q-layer.1": 2000.7771, "encoder_q-layer.10": 1310.2605, "encoder_q-layer.11": 2952.7859, "encoder_q-layer.2": 2359.863, "encoder_q-layer.3": 2356.1882, "encoder_q-layer.4": 2405.6692, "encoder_q-layer.5": 2167.1467, "encoder_q-layer.6": 1762.5931, "encoder_q-layer.7": 1633.2535, "encoder_q-layer.8": 1523.4968, "encoder_q-layer.9": 1177.5377, "epoch": 0.62, "inbatch_neg_score": 0.28, "inbatch_pos_score": 0.9346, "learning_rate": 2.0388888888888892e-05, "loss": 3.4313, "norm_diff": 0.1174, "norm_loss": 0.0, "num_token_doc": 66.7064, "num_token_overlap": 14.576, "num_token_query": 37.2309, "num_token_union": 65.2655, "num_word_context": 201.9219, "num_word_doc": 49.7489, "num_word_query": 27.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3057.5469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.34, "queue_k_norm": 1.4509, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2309, "sent_len_1": 66.7064, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5462, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4256, "doc_norm": 1.4498, "encoder_q-embeddings": 3613.9304, "encoder_q-layer.0": 2779.5129, "encoder_q-layer.1": 3114.5764, "encoder_q-layer.10": 1188.9021, "encoder_q-layer.11": 2934.2715, "encoder_q-layer.2": 3627.7346, "encoder_q-layer.3": 3753.3218, "encoder_q-layer.4": 3716.8679, "encoder_q-layer.5": 2863.1099, "encoder_q-layer.6": 2402.3169, "encoder_q-layer.7": 2442.4639, "encoder_q-layer.8": 2217.551, "encoder_q-layer.9": 1318.0573, "epoch": 0.62, "inbatch_neg_score": 0.275, "inbatch_pos_score": 0.9199, "learning_rate": 2.0333333333333334e-05, "loss": 3.4256, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 66.8077, "num_token_overlap": 14.6024, "num_token_query": 37.3548, "num_token_union": 65.3952, "num_word_context": 202.4082, "num_word_doc": 49.8739, "num_word_query": 27.9768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4412.0489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2751, "query_norm": 1.3454, "queue_k_norm": 1.4521, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3548, "sent_len_1": 66.8077, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.7325, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4082, "doc_norm": 1.4557, "encoder_q-embeddings": 1304.446, "encoder_q-layer.0": 841.9931, "encoder_q-layer.1": 922.7508, "encoder_q-layer.10": 1098.2759, "encoder_q-layer.11": 2868.895, "encoder_q-layer.2": 1048.3608, "encoder_q-layer.3": 1073.4365, "encoder_q-layer.4": 1100.6132, "encoder_q-layer.5": 1052.9346, "encoder_q-layer.6": 1075.4108, "encoder_q-layer.7": 1135.0856, "encoder_q-layer.8": 1271.6294, "encoder_q-layer.9": 1138.6891, "epoch": 0.62, "inbatch_neg_score": 0.2765, "inbatch_pos_score": 0.9297, "learning_rate": 2.027777777777778e-05, "loss": 3.4082, "norm_diff": 0.0994, "norm_loss": 0.0, "num_token_doc": 66.7587, "num_token_overlap": 14.6499, "num_token_query": 37.5124, "num_token_union": 65.4305, "num_word_context": 201.9076, "num_word_doc": 49.8379, "num_word_query": 28.0783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1968.2523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2764, "query_norm": 1.3562, "queue_k_norm": 1.4498, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5124, "sent_len_1": 66.7587, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.9013, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4408, "doc_norm": 1.4401, "encoder_q-embeddings": 668.0305, "encoder_q-layer.0": 480.1409, "encoder_q-layer.1": 509.2155, "encoder_q-layer.10": 597.3612, "encoder_q-layer.11": 1430.7963, "encoder_q-layer.2": 576.4902, "encoder_q-layer.3": 567.2942, "encoder_q-layer.4": 605.7314, "encoder_q-layer.5": 608.4901, "encoder_q-layer.6": 607.9944, "encoder_q-layer.7": 570.6594, "encoder_q-layer.8": 627.954, "encoder_q-layer.9": 576.5579, "epoch": 0.62, "inbatch_neg_score": 0.2775, "inbatch_pos_score": 0.8833, "learning_rate": 2.0222222222222222e-05, "loss": 3.4408, "norm_diff": 0.1105, "norm_loss": 0.0, "num_token_doc": 67.0504, "num_token_overlap": 14.6166, "num_token_query": 37.3112, "num_token_union": 65.4641, "num_word_context": 202.8168, "num_word_doc": 50.0312, "num_word_query": 27.946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.5502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2769, "query_norm": 1.3295, "queue_k_norm": 1.4509, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3112, "sent_len_1": 67.0504, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.08, "stdk": 0.0482, "stdq": 0.0436, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.4204, "doc_norm": 1.4489, "encoder_q-embeddings": 2298.6003, "encoder_q-layer.0": 1667.8524, "encoder_q-layer.1": 1840.197, "encoder_q-layer.10": 552.1208, "encoder_q-layer.11": 1450.9904, "encoder_q-layer.2": 2085.5645, "encoder_q-layer.3": 2189.3079, "encoder_q-layer.4": 2210.0378, "encoder_q-layer.5": 2126.5837, "encoder_q-layer.6": 1687.2875, "encoder_q-layer.7": 1334.2644, "encoder_q-layer.8": 859.9346, "encoder_q-layer.9": 559.0538, "epoch": 0.62, "inbatch_neg_score": 0.2744, "inbatch_pos_score": 0.9438, "learning_rate": 2.0166666666666668e-05, "loss": 3.4204, "norm_diff": 0.0919, "norm_loss": 0.0, "num_token_doc": 66.7641, "num_token_overlap": 14.5865, "num_token_query": 37.2733, "num_token_union": 65.3382, "num_word_context": 202.2269, "num_word_doc": 49.807, "num_word_query": 27.922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2620.5622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2737, "query_norm": 1.3569, "queue_k_norm": 1.4488, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2733, "sent_len_1": 66.7641, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5037, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4353, "doc_norm": 1.4425, "encoder_q-embeddings": 341.0317, "encoder_q-layer.0": 239.8352, "encoder_q-layer.1": 263.896, "encoder_q-layer.10": 372.9291, "encoder_q-layer.11": 808.2497, "encoder_q-layer.2": 303.8521, "encoder_q-layer.3": 310.6555, "encoder_q-layer.4": 327.9796, "encoder_q-layer.5": 309.5149, "encoder_q-layer.6": 320.6513, "encoder_q-layer.7": 341.4392, "encoder_q-layer.8": 354.711, "encoder_q-layer.9": 302.5692, "epoch": 0.62, "inbatch_neg_score": 0.274, "inbatch_pos_score": 0.9194, "learning_rate": 2.011111111111111e-05, "loss": 3.4353, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 66.613, "num_token_overlap": 14.5436, "num_token_query": 37.0483, "num_token_union": 65.1409, "num_word_context": 202.025, "num_word_doc": 49.7232, "num_word_query": 27.7195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 558.6607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2747, "query_norm": 1.3544, "queue_k_norm": 1.4497, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0483, "sent_len_1": 66.613, "sent_len_max_0": 127.98, "sent_len_max_1": 189.535, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.4201, "doc_norm": 1.4448, "encoder_q-embeddings": 346.9967, "encoder_q-layer.0": 270.9158, "encoder_q-layer.1": 305.4685, "encoder_q-layer.10": 332.969, "encoder_q-layer.11": 850.5612, "encoder_q-layer.2": 348.0786, "encoder_q-layer.3": 392.6112, "encoder_q-layer.4": 449.4798, "encoder_q-layer.5": 297.2224, "encoder_q-layer.6": 285.4476, "encoder_q-layer.7": 300.8816, "encoder_q-layer.8": 361.7876, "encoder_q-layer.9": 333.1171, "epoch": 0.62, "inbatch_neg_score": 0.2722, "inbatch_pos_score": 0.895, "learning_rate": 2.0055555555555556e-05, "loss": 3.4201, "norm_diff": 0.1023, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 14.5457, "num_token_query": 37.2973, "num_token_union": 65.3729, "num_word_context": 202.5559, "num_word_doc": 49.7778, "num_word_query": 27.9093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 595.9761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2725, "query_norm": 1.3425, "queue_k_norm": 1.45, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2973, "sent_len_1": 66.627, "sent_len_max_0": 128.0, "sent_len_max_1": 187.225, "stdk": 0.0484, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.4041, "doc_norm": 1.4552, "encoder_q-embeddings": 375.9766, "encoder_q-layer.0": 252.2116, "encoder_q-layer.1": 285.756, "encoder_q-layer.10": 299.4861, "encoder_q-layer.11": 741.084, "encoder_q-layer.2": 365.7651, "encoder_q-layer.3": 366.5069, "encoder_q-layer.4": 379.7668, "encoder_q-layer.5": 357.9695, "encoder_q-layer.6": 305.7901, "encoder_q-layer.7": 280.034, "encoder_q-layer.8": 332.8636, "encoder_q-layer.9": 295.2101, "epoch": 0.62, "inbatch_neg_score": 0.2689, "inbatch_pos_score": 0.918, "learning_rate": 2e-05, "loss": 3.4041, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.7797, "num_token_overlap": 14.5955, "num_token_query": 37.4617, "num_token_union": 65.3934, "num_word_context": 202.2963, "num_word_doc": 49.8005, "num_word_query": 28.0597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 571.313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.269, "query_norm": 1.3411, "queue_k_norm": 1.4484, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4617, "sent_len_1": 66.7797, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.785, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.4343, "doc_norm": 1.4494, "encoder_q-embeddings": 367.3302, "encoder_q-layer.0": 256.2885, "encoder_q-layer.1": 264.7817, "encoder_q-layer.10": 287.1288, "encoder_q-layer.11": 747.8739, "encoder_q-layer.2": 313.562, "encoder_q-layer.3": 344.2683, "encoder_q-layer.4": 380.2072, "encoder_q-layer.5": 358.9106, "encoder_q-layer.6": 353.1784, "encoder_q-layer.7": 314.1892, "encoder_q-layer.8": 321.6344, "encoder_q-layer.9": 285.5406, "epoch": 0.63, "inbatch_neg_score": 0.2659, "inbatch_pos_score": 0.9014, "learning_rate": 1.9944444444444447e-05, "loss": 3.4343, "norm_diff": 0.1115, "norm_loss": 0.0, "num_token_doc": 66.8959, "num_token_overlap": 14.4968, "num_token_query": 37.1259, "num_token_union": 65.4422, "num_word_context": 202.3738, "num_word_doc": 49.9228, "num_word_query": 27.758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 573.1, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2666, "query_norm": 1.3378, "queue_k_norm": 1.4492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1259, "sent_len_1": 66.8959, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.9187, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4259, "doc_norm": 1.4528, "encoder_q-embeddings": 306.774, "encoder_q-layer.0": 208.6326, "encoder_q-layer.1": 228.6079, "encoder_q-layer.10": 269.4487, "encoder_q-layer.11": 761.3242, "encoder_q-layer.2": 262.2703, "encoder_q-layer.3": 274.0946, "encoder_q-layer.4": 295.9617, "encoder_q-layer.5": 318.0343, "encoder_q-layer.6": 345.5337, "encoder_q-layer.7": 316.7446, "encoder_q-layer.8": 325.3606, "encoder_q-layer.9": 275.7198, "epoch": 0.63, "inbatch_neg_score": 0.2679, "inbatch_pos_score": 0.9053, "learning_rate": 1.988888888888889e-05, "loss": 3.4259, "norm_diff": 0.1222, "norm_loss": 0.0, "num_token_doc": 66.614, "num_token_overlap": 14.5703, "num_token_query": 37.2903, "num_token_union": 65.2804, "num_word_context": 202.0955, "num_word_doc": 49.7086, "num_word_query": 27.9291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 522.2052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2686, "query_norm": 1.3306, "queue_k_norm": 1.4497, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2903, "sent_len_1": 66.614, "sent_len_max_0": 127.995, "sent_len_max_1": 190.3525, "stdk": 0.0488, "stdq": 0.0436, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.4165, "doc_norm": 1.4514, "encoder_q-embeddings": 292.6512, "encoder_q-layer.0": 195.7035, "encoder_q-layer.1": 218.2821, "encoder_q-layer.10": 266.2134, "encoder_q-layer.11": 688.2724, "encoder_q-layer.2": 249.4548, "encoder_q-layer.3": 242.0942, "encoder_q-layer.4": 242.1772, "encoder_q-layer.5": 257.3155, "encoder_q-layer.6": 288.5191, "encoder_q-layer.7": 284.0914, "encoder_q-layer.8": 298.1618, "encoder_q-layer.9": 264.9237, "epoch": 0.63, "inbatch_neg_score": 0.2618, "inbatch_pos_score": 0.917, "learning_rate": 1.9833333333333335e-05, "loss": 3.4165, "norm_diff": 0.1205, "norm_loss": 0.0, "num_token_doc": 66.7648, "num_token_overlap": 14.6348, "num_token_query": 37.323, "num_token_union": 65.3323, "num_word_context": 202.1384, "num_word_doc": 49.859, "num_word_query": 27.9563, "postclip_grad_norm": 1.0, "preclip_grad_norm": 475.9293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2622, "query_norm": 1.3309, "queue_k_norm": 1.4479, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.323, "sent_len_1": 66.7648, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5387, "stdk": 0.0487, "stdq": 0.0438, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4179, "doc_norm": 1.4461, "encoder_q-embeddings": 684.4109, "encoder_q-layer.0": 494.2726, "encoder_q-layer.1": 597.9166, "encoder_q-layer.10": 266.1609, "encoder_q-layer.11": 719.5629, "encoder_q-layer.2": 659.6609, "encoder_q-layer.3": 683.1074, "encoder_q-layer.4": 584.4361, "encoder_q-layer.5": 522.775, "encoder_q-layer.6": 450.3797, "encoder_q-layer.7": 448.1736, "encoder_q-layer.8": 335.4387, "encoder_q-layer.9": 276.6171, "epoch": 0.63, "inbatch_neg_score": 0.261, "inbatch_pos_score": 0.8975, "learning_rate": 1.9777777777777778e-05, "loss": 3.4179, "norm_diff": 0.1115, "norm_loss": 0.0, "num_token_doc": 66.9573, "num_token_overlap": 14.6499, "num_token_query": 37.3519, "num_token_union": 65.445, "num_word_context": 202.8663, "num_word_doc": 49.9941, "num_word_query": 27.9582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 831.7902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2603, "query_norm": 1.3346, "queue_k_norm": 1.4473, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3519, "sent_len_1": 66.9573, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2912, "stdk": 0.0485, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3972, "doc_norm": 1.4479, "encoder_q-embeddings": 629.4432, "encoder_q-layer.0": 448.0303, "encoder_q-layer.1": 489.1456, "encoder_q-layer.10": 283.6399, "encoder_q-layer.11": 718.4335, "encoder_q-layer.2": 564.5836, "encoder_q-layer.3": 559.1536, "encoder_q-layer.4": 518.3262, "encoder_q-layer.5": 519.1544, "encoder_q-layer.6": 468.8471, "encoder_q-layer.7": 504.9923, "encoder_q-layer.8": 442.3861, "encoder_q-layer.9": 301.8637, "epoch": 0.63, "inbatch_neg_score": 0.2555, "inbatch_pos_score": 0.9033, "learning_rate": 1.9722222222222224e-05, "loss": 3.3972, "norm_diff": 0.1013, "norm_loss": 0.0, "num_token_doc": 66.7815, "num_token_overlap": 14.6172, "num_token_query": 37.3412, "num_token_union": 65.3546, "num_word_context": 202.4249, "num_word_doc": 49.8811, "num_word_query": 27.9821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 783.5651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2561, "query_norm": 1.3466, "queue_k_norm": 1.4476, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3412, "sent_len_1": 66.7815, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.2625, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.4212, "doc_norm": 1.4397, "encoder_q-embeddings": 10184.2578, "encoder_q-layer.0": 7421.0381, "encoder_q-layer.1": 8838.2656, "encoder_q-layer.10": 297.5576, "encoder_q-layer.11": 702.5813, "encoder_q-layer.2": 9784.8018, "encoder_q-layer.3": 9884.7646, "encoder_q-layer.4": 10132.2666, "encoder_q-layer.5": 9608.2686, "encoder_q-layer.6": 8109.1333, "encoder_q-layer.7": 5994.4453, "encoder_q-layer.8": 3131.7363, "encoder_q-layer.9": 774.806, "epoch": 0.63, "inbatch_neg_score": 0.2558, "inbatch_pos_score": 0.8745, "learning_rate": 1.9666666666666666e-05, "loss": 3.4212, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.9448, "num_token_overlap": 14.6642, "num_token_query": 37.4991, "num_token_union": 65.5171, "num_word_context": 202.4009, "num_word_doc": 49.9611, "num_word_query": 28.082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11781.4641, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2566, "query_norm": 1.3189, "queue_k_norm": 1.4481, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4991, "sent_len_1": 66.9448, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8162, "stdk": 0.0484, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.4314, "doc_norm": 1.4411, "encoder_q-embeddings": 244.3029, "encoder_q-layer.0": 155.9031, "encoder_q-layer.1": 163.2101, "encoder_q-layer.10": 319.6964, "encoder_q-layer.11": 791.9659, "encoder_q-layer.2": 186.9196, "encoder_q-layer.3": 194.4421, "encoder_q-layer.4": 204.442, "encoder_q-layer.5": 209.3131, "encoder_q-layer.6": 244.0113, "encoder_q-layer.7": 259.6874, "encoder_q-layer.8": 328.3156, "encoder_q-layer.9": 299.6267, "epoch": 0.63, "inbatch_neg_score": 0.2546, "inbatch_pos_score": 0.9165, "learning_rate": 1.9611111111111115e-05, "loss": 3.4314, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.8438, "num_token_overlap": 14.5334, "num_token_query": 37.3639, "num_token_union": 65.5055, "num_word_context": 202.5164, "num_word_doc": 49.863, "num_word_query": 27.9718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 483.4847, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2546, "query_norm": 1.34, "queue_k_norm": 1.4459, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3639, "sent_len_1": 66.8438, "sent_len_max_0": 128.0, "sent_len_max_1": 189.19, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.41, "doc_norm": 1.4501, "encoder_q-embeddings": 1087.4658, "encoder_q-layer.0": 743.9053, "encoder_q-layer.1": 868.1132, "encoder_q-layer.10": 278.782, "encoder_q-layer.11": 735.52, "encoder_q-layer.2": 1062.8896, "encoder_q-layer.3": 1010.1531, "encoder_q-layer.4": 971.4688, "encoder_q-layer.5": 920.1771, "encoder_q-layer.6": 1015.3731, "encoder_q-layer.7": 1147.5895, "encoder_q-layer.8": 684.4501, "encoder_q-layer.9": 296.7558, "epoch": 0.63, "inbatch_neg_score": 0.2532, "inbatch_pos_score": 0.8911, "learning_rate": 1.9555555555555557e-05, "loss": 3.41, "norm_diff": 0.1216, "norm_loss": 0.0, "num_token_doc": 66.8598, "num_token_overlap": 14.5832, "num_token_query": 37.3107, "num_token_union": 65.3807, "num_word_context": 202.2724, "num_word_doc": 49.8883, "num_word_query": 27.9371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1340.9242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.3285, "queue_k_norm": 1.4453, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3107, "sent_len_1": 66.8598, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.535, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.417, "doc_norm": 1.4466, "encoder_q-embeddings": 255.7005, "encoder_q-layer.0": 176.2778, "encoder_q-layer.1": 188.3364, "encoder_q-layer.10": 287.5157, "encoder_q-layer.11": 737.0059, "encoder_q-layer.2": 209.2171, "encoder_q-layer.3": 207.1346, "encoder_q-layer.4": 218.307, "encoder_q-layer.5": 232.498, "encoder_q-layer.6": 247.4992, "encoder_q-layer.7": 276.361, "encoder_q-layer.8": 318.9969, "encoder_q-layer.9": 278.6501, "epoch": 0.63, "inbatch_neg_score": 0.2548, "inbatch_pos_score": 0.9067, "learning_rate": 1.9500000000000003e-05, "loss": 3.417, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 66.6546, "num_token_overlap": 14.602, "num_token_query": 37.1743, "num_token_union": 65.1931, "num_word_context": 202.3041, "num_word_doc": 49.7072, "num_word_query": 27.8114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 474.1418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.3422, "queue_k_norm": 1.4462, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1743, "sent_len_1": 66.6546, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.465, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4298, "doc_norm": 1.4438, "encoder_q-embeddings": 276.7187, "encoder_q-layer.0": 183.0852, "encoder_q-layer.1": 202.8692, "encoder_q-layer.10": 311.5195, "encoder_q-layer.11": 738.4504, "encoder_q-layer.2": 223.3997, "encoder_q-layer.3": 231.6307, "encoder_q-layer.4": 237.3743, "encoder_q-layer.5": 229.4656, "encoder_q-layer.6": 249.3632, "encoder_q-layer.7": 266.8995, "encoder_q-layer.8": 321.4784, "encoder_q-layer.9": 288.5759, "epoch": 0.63, "inbatch_neg_score": 0.255, "inbatch_pos_score": 0.9131, "learning_rate": 1.9444444444444445e-05, "loss": 3.4298, "norm_diff": 0.0992, "norm_loss": 0.0, "num_token_doc": 66.8396, "num_token_overlap": 14.5161, "num_token_query": 37.2834, "num_token_union": 65.4667, "num_word_context": 202.5753, "num_word_doc": 49.8906, "num_word_query": 27.9233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 480.0249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2542, "query_norm": 1.3446, "queue_k_norm": 1.4463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2834, "sent_len_1": 66.8396, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1175, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4288, "doc_norm": 1.4465, "encoder_q-embeddings": 291.1425, "encoder_q-layer.0": 195.4785, "encoder_q-layer.1": 195.9321, "encoder_q-layer.10": 329.5494, "encoder_q-layer.11": 824.928, "encoder_q-layer.2": 217.8214, "encoder_q-layer.3": 230.2425, "encoder_q-layer.4": 255.35, "encoder_q-layer.5": 249.8807, "encoder_q-layer.6": 279.5026, "encoder_q-layer.7": 294.4296, "encoder_q-layer.8": 337.8111, "encoder_q-layer.9": 299.1542, "epoch": 0.64, "inbatch_neg_score": 0.2532, "inbatch_pos_score": 0.8892, "learning_rate": 1.938888888888889e-05, "loss": 3.4288, "norm_diff": 0.1004, "norm_loss": 0.0, "num_token_doc": 66.8308, "num_token_overlap": 14.588, "num_token_query": 37.3097, "num_token_union": 65.4393, "num_word_context": 202.3553, "num_word_doc": 49.8951, "num_word_query": 27.9467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 527.8907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2542, "query_norm": 1.3461, "queue_k_norm": 1.4445, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3097, "sent_len_1": 66.8308, "sent_len_max_0": 127.99, "sent_len_max_1": 188.2537, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4135, "doc_norm": 1.4484, "encoder_q-embeddings": 393.5125, "encoder_q-layer.0": 266.5975, "encoder_q-layer.1": 284.6198, "encoder_q-layer.10": 280.966, "encoder_q-layer.11": 717.4891, "encoder_q-layer.2": 306.9211, "encoder_q-layer.3": 331.983, "encoder_q-layer.4": 367.7576, "encoder_q-layer.5": 365.6345, "encoder_q-layer.6": 324.9727, "encoder_q-layer.7": 295.497, "encoder_q-layer.8": 320.2491, "encoder_q-layer.9": 276.5583, "epoch": 0.64, "inbatch_neg_score": 0.2539, "inbatch_pos_score": 0.9023, "learning_rate": 1.9333333333333333e-05, "loss": 3.4135, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 66.9217, "num_token_overlap": 14.5925, "num_token_query": 37.3848, "num_token_union": 65.4964, "num_word_context": 202.6159, "num_word_doc": 49.9388, "num_word_query": 28.0004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 558.7921, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2539, "query_norm": 1.3393, "queue_k_norm": 1.4444, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3848, "sent_len_1": 66.9217, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6488, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4469, "doc_norm": 1.447, "encoder_q-embeddings": 1125.4121, "encoder_q-layer.0": 894.611, "encoder_q-layer.1": 956.4935, "encoder_q-layer.10": 298.0121, "encoder_q-layer.11": 736.7395, "encoder_q-layer.2": 1040.1935, "encoder_q-layer.3": 1153.0771, "encoder_q-layer.4": 1014.98, "encoder_q-layer.5": 998.1263, "encoder_q-layer.6": 788.6097, "encoder_q-layer.7": 692.0341, "encoder_q-layer.8": 586.5159, "encoder_q-layer.9": 312.2207, "epoch": 0.64, "inbatch_neg_score": 0.2544, "inbatch_pos_score": 0.8965, "learning_rate": 1.927777777777778e-05, "loss": 3.4469, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.6422, "num_token_overlap": 14.5568, "num_token_query": 37.2305, "num_token_union": 65.2335, "num_word_context": 201.9878, "num_word_doc": 49.6954, "num_word_query": 27.852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1387.0114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2551, "query_norm": 1.332, "queue_k_norm": 1.443, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2305, "sent_len_1": 66.6422, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2625, "stdk": 0.0488, "stdq": 0.0438, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.3995, "doc_norm": 1.4392, "encoder_q-embeddings": 334.878, "encoder_q-layer.0": 240.085, "encoder_q-layer.1": 256.766, "encoder_q-layer.10": 343.6956, "encoder_q-layer.11": 744.2644, "encoder_q-layer.2": 318.4992, "encoder_q-layer.3": 312.7264, "encoder_q-layer.4": 313.4797, "encoder_q-layer.5": 292.4384, "encoder_q-layer.6": 280.5224, "encoder_q-layer.7": 285.3309, "encoder_q-layer.8": 349.0203, "encoder_q-layer.9": 294.6062, "epoch": 0.64, "inbatch_neg_score": 0.2499, "inbatch_pos_score": 0.8862, "learning_rate": 1.922222222222222e-05, "loss": 3.3995, "norm_diff": 0.1054, "norm_loss": 0.0, "num_token_doc": 66.9763, "num_token_overlap": 14.6824, "num_token_query": 37.5145, "num_token_union": 65.5224, "num_word_context": 202.5198, "num_word_doc": 49.9892, "num_word_query": 28.1122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 540.0091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2505, "query_norm": 1.3338, "queue_k_norm": 1.4437, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5145, "sent_len_1": 66.9763, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4725, "stdk": 0.0485, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.4011, "doc_norm": 1.4472, "encoder_q-embeddings": 941.4971, "encoder_q-layer.0": 737.8149, "encoder_q-layer.1": 799.8299, "encoder_q-layer.10": 293.7719, "encoder_q-layer.11": 742.5835, "encoder_q-layer.2": 917.8643, "encoder_q-layer.3": 953.9845, "encoder_q-layer.4": 910.5667, "encoder_q-layer.5": 926.7767, "encoder_q-layer.6": 736.4218, "encoder_q-layer.7": 536.0634, "encoder_q-layer.8": 424.4497, "encoder_q-layer.9": 290.4193, "epoch": 0.64, "inbatch_neg_score": 0.2521, "inbatch_pos_score": 0.9214, "learning_rate": 1.9166666666666667e-05, "loss": 3.4011, "norm_diff": 0.095, "norm_loss": 0.0, "num_token_doc": 66.8452, "num_token_overlap": 14.5846, "num_token_query": 37.2481, "num_token_union": 65.3961, "num_word_context": 202.2297, "num_word_doc": 49.8639, "num_word_query": 27.8818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1137.201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.3522, "queue_k_norm": 1.4434, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2481, "sent_len_1": 66.8452, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.7188, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3998, "doc_norm": 1.4478, "encoder_q-embeddings": 267.0082, "encoder_q-layer.0": 187.8845, "encoder_q-layer.1": 195.66, "encoder_q-layer.10": 281.7128, "encoder_q-layer.11": 701.3875, "encoder_q-layer.2": 228.1348, "encoder_q-layer.3": 231.1106, "encoder_q-layer.4": 243.7829, "encoder_q-layer.5": 245.4547, "encoder_q-layer.6": 261.2596, "encoder_q-layer.7": 269.9737, "encoder_q-layer.8": 308.9517, "encoder_q-layer.9": 277.0559, "epoch": 0.64, "inbatch_neg_score": 0.2522, "inbatch_pos_score": 0.8975, "learning_rate": 1.9111111111111113e-05, "loss": 3.3998, "norm_diff": 0.1078, "norm_loss": 0.0, "num_token_doc": 66.8294, "num_token_overlap": 14.6348, "num_token_query": 37.3752, "num_token_union": 65.4272, "num_word_context": 202.423, "num_word_doc": 49.9134, "num_word_query": 27.9999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 474.3753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.34, "queue_k_norm": 1.4416, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3752, "sent_len_1": 66.8294, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.7612, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4072, "doc_norm": 1.4428, "encoder_q-embeddings": 255.4353, "encoder_q-layer.0": 170.2873, "encoder_q-layer.1": 180.0068, "encoder_q-layer.10": 303.3682, "encoder_q-layer.11": 751.389, "encoder_q-layer.2": 201.5531, "encoder_q-layer.3": 211.7505, "encoder_q-layer.4": 209.3689, "encoder_q-layer.5": 216.3603, "encoder_q-layer.6": 234.4136, "encoder_q-layer.7": 245.7563, "encoder_q-layer.8": 314.1088, "encoder_q-layer.9": 299.4984, "epoch": 0.64, "inbatch_neg_score": 0.2565, "inbatch_pos_score": 0.8916, "learning_rate": 1.905555555555556e-05, "loss": 3.4072, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 66.7846, "num_token_overlap": 14.5842, "num_token_query": 37.2198, "num_token_union": 65.3283, "num_word_context": 202.003, "num_word_doc": 49.8241, "num_word_query": 27.8686, "postclip_grad_norm": 1.0, "preclip_grad_norm": 464.1447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2546, "query_norm": 1.3468, "queue_k_norm": 1.4414, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2198, "sent_len_1": 66.7846, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.6463, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.4094, "doc_norm": 1.4486, "encoder_q-embeddings": 2739.0256, "encoder_q-layer.0": 1987.1355, "encoder_q-layer.1": 2402.3748, "encoder_q-layer.10": 551.0398, "encoder_q-layer.11": 1387.8319, "encoder_q-layer.2": 3018.0862, "encoder_q-layer.3": 2784.4229, "encoder_q-layer.4": 2653.6919, "encoder_q-layer.5": 2103.3225, "encoder_q-layer.6": 1678.2894, "encoder_q-layer.7": 1374.5133, "encoder_q-layer.8": 1134.7037, "encoder_q-layer.9": 626.6946, "epoch": 0.64, "inbatch_neg_score": 0.2532, "inbatch_pos_score": 0.9258, "learning_rate": 1.9e-05, "loss": 3.4094, "norm_diff": 0.0996, "norm_loss": 0.0, "num_token_doc": 66.7249, "num_token_overlap": 14.6082, "num_token_query": 37.36, "num_token_union": 65.3553, "num_word_context": 202.304, "num_word_doc": 49.7867, "num_word_query": 27.9821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3178.4203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2529, "query_norm": 1.349, "queue_k_norm": 1.439, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.36, "sent_len_1": 66.7249, "sent_len_max_0": 128.0, "sent_len_max_1": 192.4225, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4141, "doc_norm": 1.4485, "encoder_q-embeddings": 614.3406, "encoder_q-layer.0": 414.9088, "encoder_q-layer.1": 449.6089, "encoder_q-layer.10": 623.1777, "encoder_q-layer.11": 1445.7092, "encoder_q-layer.2": 529.5171, "encoder_q-layer.3": 549.5262, "encoder_q-layer.4": 584.8306, "encoder_q-layer.5": 532.2597, "encoder_q-layer.6": 547.3627, "encoder_q-layer.7": 576.5555, "encoder_q-layer.8": 615.3695, "encoder_q-layer.9": 555.6915, "epoch": 0.64, "inbatch_neg_score": 0.2516, "inbatch_pos_score": 0.9028, "learning_rate": 1.8944444444444447e-05, "loss": 3.4141, "norm_diff": 0.1158, "norm_loss": 0.0, "num_token_doc": 66.7653, "num_token_overlap": 14.611, "num_token_query": 37.4764, "num_token_union": 65.4546, "num_word_context": 202.254, "num_word_doc": 49.8033, "num_word_query": 28.064, "postclip_grad_norm": 1.0, "preclip_grad_norm": 978.9394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2515, "query_norm": 1.3327, "queue_k_norm": 1.4414, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4764, "sent_len_1": 66.7653, "sent_len_max_0": 127.985, "sent_len_max_1": 186.9512, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4011, "doc_norm": 1.4463, "encoder_q-embeddings": 494.8769, "encoder_q-layer.0": 328.7629, "encoder_q-layer.1": 348.9976, "encoder_q-layer.10": 613.8409, "encoder_q-layer.11": 1443.0574, "encoder_q-layer.2": 379.787, "encoder_q-layer.3": 378.7912, "encoder_q-layer.4": 411.3177, "encoder_q-layer.5": 389.5526, "encoder_q-layer.6": 443.4702, "encoder_q-layer.7": 491.8686, "encoder_q-layer.8": 583.1581, "encoder_q-layer.9": 550.4837, "epoch": 0.64, "inbatch_neg_score": 0.2552, "inbatch_pos_score": 0.8975, "learning_rate": 1.888888888888889e-05, "loss": 3.4011, "norm_diff": 0.1007, "norm_loss": 0.0, "num_token_doc": 66.8559, "num_token_overlap": 14.6297, "num_token_query": 37.4231, "num_token_union": 65.4065, "num_word_context": 202.3066, "num_word_doc": 49.8855, "num_word_query": 28.0335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 912.5065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.3456, "queue_k_norm": 1.4419, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4231, "sent_len_1": 66.8559, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9563, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.4095, "doc_norm": 1.4375, "encoder_q-embeddings": 3119.8647, "encoder_q-layer.0": 2264.4697, "encoder_q-layer.1": 2464.9241, "encoder_q-layer.10": 646.4483, "encoder_q-layer.11": 1447.1593, "encoder_q-layer.2": 2788.2676, "encoder_q-layer.3": 2991.4785, "encoder_q-layer.4": 2955.3572, "encoder_q-layer.5": 2261.8496, "encoder_q-layer.6": 1805.0111, "encoder_q-layer.7": 1197.0034, "encoder_q-layer.8": 997.3159, "encoder_q-layer.9": 612.2484, "epoch": 0.65, "inbatch_neg_score": 0.2543, "inbatch_pos_score": 0.9062, "learning_rate": 1.8833333333333335e-05, "loss": 3.4095, "norm_diff": 0.0864, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 14.5288, "num_token_query": 37.2155, "num_token_union": 65.349, "num_word_context": 201.7299, "num_word_doc": 49.7955, "num_word_query": 27.8498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3453.7052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2527, "query_norm": 1.3511, "queue_k_norm": 1.4419, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2155, "sent_len_1": 66.7575, "sent_len_max_0": 128.0, "sent_len_max_1": 190.36, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4251, "doc_norm": 1.4414, "encoder_q-embeddings": 532.1248, "encoder_q-layer.0": 358.1042, "encoder_q-layer.1": 370.3066, "encoder_q-layer.10": 608.8085, "encoder_q-layer.11": 1449.7327, "encoder_q-layer.2": 420.6088, "encoder_q-layer.3": 439.5822, "encoder_q-layer.4": 479.3747, "encoder_q-layer.5": 459.9575, "encoder_q-layer.6": 514.6148, "encoder_q-layer.7": 541.7134, "encoder_q-layer.8": 630.2073, "encoder_q-layer.9": 572.0443, "epoch": 0.65, "inbatch_neg_score": 0.2503, "inbatch_pos_score": 0.8853, "learning_rate": 1.8777777777777777e-05, "loss": 3.4251, "norm_diff": 0.0973, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 14.5705, "num_token_query": 37.1818, "num_token_union": 65.2816, "num_word_context": 202.3127, "num_word_doc": 49.8344, "num_word_query": 27.8342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 940.9436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.25, "query_norm": 1.3441, "queue_k_norm": 1.4411, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1818, "sent_len_1": 66.8064, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4038, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.4117, "doc_norm": 1.4387, "encoder_q-embeddings": 666.7643, "encoder_q-layer.0": 426.9575, "encoder_q-layer.1": 443.477, "encoder_q-layer.10": 543.4014, "encoder_q-layer.11": 1415.5634, "encoder_q-layer.2": 448.407, "encoder_q-layer.3": 443.572, "encoder_q-layer.4": 434.4545, "encoder_q-layer.5": 430.9904, "encoder_q-layer.6": 473.4985, "encoder_q-layer.7": 556.4543, "encoder_q-layer.8": 631.1478, "encoder_q-layer.9": 549.4296, "epoch": 0.65, "inbatch_neg_score": 0.2581, "inbatch_pos_score": 0.8843, "learning_rate": 1.8722222222222223e-05, "loss": 3.4117, "norm_diff": 0.0964, "norm_loss": 0.0, "num_token_doc": 66.9612, "num_token_overlap": 14.578, "num_token_query": 37.2812, "num_token_union": 65.4531, "num_word_context": 202.6445, "num_word_doc": 49.9418, "num_word_query": 27.9332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 967.5178, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2563, "query_norm": 1.3423, "queue_k_norm": 1.442, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2812, "sent_len_1": 66.9612, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8288, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.4117, "doc_norm": 1.4421, "encoder_q-embeddings": 616.7629, "encoder_q-layer.0": 423.8615, "encoder_q-layer.1": 438.1974, "encoder_q-layer.10": 565.8251, "encoder_q-layer.11": 1433.2242, "encoder_q-layer.2": 508.4707, "encoder_q-layer.3": 492.2627, "encoder_q-layer.4": 536.147, "encoder_q-layer.5": 479.5116, "encoder_q-layer.6": 530.0406, "encoder_q-layer.7": 544.8685, "encoder_q-layer.8": 621.9632, "encoder_q-layer.9": 556.8898, "epoch": 0.65, "inbatch_neg_score": 0.2568, "inbatch_pos_score": 0.877, "learning_rate": 1.866666666666667e-05, "loss": 3.4117, "norm_diff": 0.1033, "norm_loss": 0.0, "num_token_doc": 66.7503, "num_token_overlap": 14.5649, "num_token_query": 37.3716, "num_token_union": 65.3908, "num_word_context": 202.4576, "num_word_doc": 49.7846, "num_word_query": 27.9596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.6432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2556, "query_norm": 1.3388, "queue_k_norm": 1.4405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3716, "sent_len_1": 66.7503, "sent_len_max_0": 127.9925, "sent_len_max_1": 192.3, "stdk": 0.0486, "stdq": 0.0437, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.417, "doc_norm": 1.4423, "encoder_q-embeddings": 18300.0293, "encoder_q-layer.0": 12901.5225, "encoder_q-layer.1": 12044.1807, "encoder_q-layer.10": 699.871, "encoder_q-layer.11": 1446.8317, "encoder_q-layer.2": 13215.6328, "encoder_q-layer.3": 13428.5107, "encoder_q-layer.4": 13856.3555, "encoder_q-layer.5": 11177.8838, "encoder_q-layer.6": 8670.8447, "encoder_q-layer.7": 6835.8398, "encoder_q-layer.8": 5589.0537, "encoder_q-layer.9": 1536.5743, "epoch": 0.65, "inbatch_neg_score": 0.2585, "inbatch_pos_score": 0.8779, "learning_rate": 1.861111111111111e-05, "loss": 3.417, "norm_diff": 0.0957, "norm_loss": 0.0, "num_token_doc": 66.9059, "num_token_overlap": 14.6246, "num_token_query": 37.3234, "num_token_union": 65.4239, "num_word_context": 202.2501, "num_word_doc": 49.9362, "num_word_query": 27.9284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17035.1628, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.259, "query_norm": 1.3466, "queue_k_norm": 1.4412, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3234, "sent_len_1": 66.9059, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8212, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4041, "doc_norm": 1.4417, "encoder_q-embeddings": 1820.1714, "encoder_q-layer.0": 1408.5724, "encoder_q-layer.1": 1552.7976, "encoder_q-layer.10": 567.5248, "encoder_q-layer.11": 1414.8696, "encoder_q-layer.2": 1780.5415, "encoder_q-layer.3": 1816.9547, "encoder_q-layer.4": 1997.8254, "encoder_q-layer.5": 1636.7299, "encoder_q-layer.6": 1506.9403, "encoder_q-layer.7": 1343.6093, "encoder_q-layer.8": 1207.5034, "encoder_q-layer.9": 627.0627, "epoch": 0.65, "inbatch_neg_score": 0.2598, "inbatch_pos_score": 0.8984, "learning_rate": 1.8555555555555557e-05, "loss": 3.4041, "norm_diff": 0.0914, "norm_loss": 0.0, "num_token_doc": 66.817, "num_token_overlap": 14.6061, "num_token_query": 37.3041, "num_token_union": 65.306, "num_word_context": 202.286, "num_word_doc": 49.8379, "num_word_query": 27.9256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2256.3785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2593, "query_norm": 1.3503, "queue_k_norm": 1.4419, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3041, "sent_len_1": 66.817, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1625, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4119, "doc_norm": 1.441, "encoder_q-embeddings": 926.962, "encoder_q-layer.0": 637.4877, "encoder_q-layer.1": 665.0594, "encoder_q-layer.10": 560.3119, "encoder_q-layer.11": 1382.476, "encoder_q-layer.2": 726.1806, "encoder_q-layer.3": 727.704, "encoder_q-layer.4": 643.4422, "encoder_q-layer.5": 602.6953, "encoder_q-layer.6": 654.4921, "encoder_q-layer.7": 665.917, "encoder_q-layer.8": 700.556, "encoder_q-layer.9": 578.5164, "epoch": 0.65, "inbatch_neg_score": 0.2655, "inbatch_pos_score": 0.8882, "learning_rate": 1.85e-05, "loss": 3.4119, "norm_diff": 0.0903, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 14.5614, "num_token_query": 37.3705, "num_token_union": 65.374, "num_word_context": 202.1719, "num_word_doc": 49.7662, "num_word_query": 27.9971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1153.5684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.3507, "queue_k_norm": 1.4429, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3705, "sent_len_1": 66.7047, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.7575, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.4079, "doc_norm": 1.4411, "encoder_q-embeddings": 747.0513, "encoder_q-layer.0": 506.4403, "encoder_q-layer.1": 508.9442, "encoder_q-layer.10": 552.2649, "encoder_q-layer.11": 1419.4574, "encoder_q-layer.2": 564.0286, "encoder_q-layer.3": 565.8577, "encoder_q-layer.4": 573.4102, "encoder_q-layer.5": 526.9235, "encoder_q-layer.6": 561.1605, "encoder_q-layer.7": 580.7177, "encoder_q-layer.8": 628.2183, "encoder_q-layer.9": 547.4595, "epoch": 0.65, "inbatch_neg_score": 0.266, "inbatch_pos_score": 0.9194, "learning_rate": 1.8444444444444445e-05, "loss": 3.4079, "norm_diff": 0.0782, "norm_loss": 0.0, "num_token_doc": 67.0054, "num_token_overlap": 14.5996, "num_token_query": 37.2497, "num_token_union": 65.4392, "num_word_context": 202.8276, "num_word_doc": 49.9727, "num_word_query": 27.8811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1042.3141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2649, "query_norm": 1.363, "queue_k_norm": 1.4445, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2497, "sent_len_1": 67.0054, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.4975, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.4121, "doc_norm": 1.4416, "encoder_q-embeddings": 522.2548, "encoder_q-layer.0": 352.6014, "encoder_q-layer.1": 354.2247, "encoder_q-layer.10": 550.8195, "encoder_q-layer.11": 1411.0239, "encoder_q-layer.2": 394.8448, "encoder_q-layer.3": 408.0609, "encoder_q-layer.4": 415.9982, "encoder_q-layer.5": 415.9102, "encoder_q-layer.6": 443.0245, "encoder_q-layer.7": 497.7525, "encoder_q-layer.8": 589.7072, "encoder_q-layer.9": 542.3401, "epoch": 0.65, "inbatch_neg_score": 0.2741, "inbatch_pos_score": 0.9072, "learning_rate": 1.838888888888889e-05, "loss": 3.4121, "norm_diff": 0.084, "norm_loss": 0.0, "num_token_doc": 66.9044, "num_token_overlap": 14.58, "num_token_query": 37.3382, "num_token_union": 65.4482, "num_word_context": 202.4565, "num_word_doc": 49.9411, "num_word_query": 27.9901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 910.9059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.272, "query_norm": 1.3576, "queue_k_norm": 1.4428, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3382, "sent_len_1": 66.9044, "sent_len_max_0": 128.0, "sent_len_max_1": 190.155, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4031, "doc_norm": 1.4445, "encoder_q-embeddings": 7884.041, "encoder_q-layer.0": 5488.8774, "encoder_q-layer.1": 6355.7251, "encoder_q-layer.10": 581.8015, "encoder_q-layer.11": 1471.212, "encoder_q-layer.2": 7706.9893, "encoder_q-layer.3": 6316.7036, "encoder_q-layer.4": 5988.6372, "encoder_q-layer.5": 4846.6968, "encoder_q-layer.6": 3763.8455, "encoder_q-layer.7": 2805.9714, "encoder_q-layer.8": 2361.8298, "encoder_q-layer.9": 721.0131, "epoch": 0.65, "inbatch_neg_score": 0.2743, "inbatch_pos_score": 0.9058, "learning_rate": 1.8333333333333333e-05, "loss": 3.4031, "norm_diff": 0.074, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 14.5836, "num_token_query": 37.2904, "num_token_union": 65.25, "num_word_context": 202.1671, "num_word_doc": 49.7201, "num_word_query": 27.9252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7920.6811, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2734, "query_norm": 1.3705, "queue_k_norm": 1.444, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2904, "sent_len_1": 66.6342, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9625, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4063, "doc_norm": 1.448, "encoder_q-embeddings": 1118.74, "encoder_q-layer.0": 804.8484, "encoder_q-layer.1": 904.0185, "encoder_q-layer.10": 623.8912, "encoder_q-layer.11": 1465.2002, "encoder_q-layer.2": 1052.5765, "encoder_q-layer.3": 1077.0337, "encoder_q-layer.4": 1086.8481, "encoder_q-layer.5": 962.708, "encoder_q-layer.6": 893.3482, "encoder_q-layer.7": 754.6186, "encoder_q-layer.8": 714.3475, "encoder_q-layer.9": 583.4346, "epoch": 0.66, "inbatch_neg_score": 0.2824, "inbatch_pos_score": 0.9277, "learning_rate": 1.827777777777778e-05, "loss": 3.4063, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.9982, "num_token_overlap": 14.6104, "num_token_query": 37.4032, "num_token_union": 65.5741, "num_word_context": 202.7124, "num_word_doc": 50.0008, "num_word_query": 27.999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1444.4211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.3726, "queue_k_norm": 1.4442, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4032, "sent_len_1": 66.9982, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2475, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.3773, "doc_norm": 1.4409, "encoder_q-embeddings": 752.1369, "encoder_q-layer.0": 528.41, "encoder_q-layer.1": 552.1964, "encoder_q-layer.10": 584.9194, "encoder_q-layer.11": 1518.1699, "encoder_q-layer.2": 645.7087, "encoder_q-layer.3": 689.4952, "encoder_q-layer.4": 751.4766, "encoder_q-layer.5": 717.7501, "encoder_q-layer.6": 813.4114, "encoder_q-layer.7": 774.9098, "encoder_q-layer.8": 697.8849, "encoder_q-layer.9": 569.4827, "epoch": 0.66, "inbatch_neg_score": 0.2835, "inbatch_pos_score": 0.9038, "learning_rate": 1.8222222222222224e-05, "loss": 3.3773, "norm_diff": 0.0696, "norm_loss": 0.0, "num_token_doc": 66.6076, "num_token_overlap": 14.6206, "num_token_query": 37.332, "num_token_union": 65.2166, "num_word_context": 202.0421, "num_word_doc": 49.7174, "num_word_query": 27.9607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1174.7385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.3713, "queue_k_norm": 1.4469, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.332, "sent_len_1": 66.6076, "sent_len_max_0": 127.9887, "sent_len_max_1": 186.3288, "stdk": 0.0485, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.4113, "doc_norm": 1.4489, "encoder_q-embeddings": 1105.5887, "encoder_q-layer.0": 821.8786, "encoder_q-layer.1": 866.0023, "encoder_q-layer.10": 650.0221, "encoder_q-layer.11": 1534.5321, "encoder_q-layer.2": 1000.6233, "encoder_q-layer.3": 980.9161, "encoder_q-layer.4": 994.3727, "encoder_q-layer.5": 885.3506, "encoder_q-layer.6": 791.5531, "encoder_q-layer.7": 901.1394, "encoder_q-layer.8": 823.5089, "encoder_q-layer.9": 604.7844, "epoch": 0.66, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 0.9414, "learning_rate": 1.8166666666666667e-05, "loss": 3.4113, "norm_diff": 0.061, "norm_loss": 0.0, "num_token_doc": 66.8079, "num_token_overlap": 14.5412, "num_token_query": 37.217, "num_token_union": 65.3599, "num_word_context": 202.1653, "num_word_doc": 49.8161, "num_word_query": 27.8434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1434.4978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3879, "queue_k_norm": 1.4448, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.217, "sent_len_1": 66.8079, "sent_len_max_0": 127.995, "sent_len_max_1": 192.23, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3907, "doc_norm": 1.4446, "encoder_q-embeddings": 600.3412, "encoder_q-layer.0": 411.2727, "encoder_q-layer.1": 435.4689, "encoder_q-layer.10": 561.3924, "encoder_q-layer.11": 1408.9269, "encoder_q-layer.2": 543.7089, "encoder_q-layer.3": 545.5816, "encoder_q-layer.4": 528.8102, "encoder_q-layer.5": 493.2328, "encoder_q-layer.6": 513.8781, "encoder_q-layer.7": 556.5225, "encoder_q-layer.8": 617.1475, "encoder_q-layer.9": 533.3741, "epoch": 0.66, "inbatch_neg_score": 0.2989, "inbatch_pos_score": 0.9326, "learning_rate": 1.8111111111111112e-05, "loss": 3.3907, "norm_diff": 0.0744, "norm_loss": 0.0, "num_token_doc": 66.6914, "num_token_overlap": 14.5816, "num_token_query": 37.3453, "num_token_union": 65.3499, "num_word_context": 202.3459, "num_word_doc": 49.7318, "num_word_query": 27.9396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.7194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.3702, "queue_k_norm": 1.447, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3453, "sent_len_1": 66.6914, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2625, "stdk": 0.0485, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.3991, "doc_norm": 1.4473, "encoder_q-embeddings": 2336.2625, "encoder_q-layer.0": 1750.5388, "encoder_q-layer.1": 1875.6476, "encoder_q-layer.10": 596.0314, "encoder_q-layer.11": 1561.7711, "encoder_q-layer.2": 2563.0884, "encoder_q-layer.3": 2557.5903, "encoder_q-layer.4": 2813.5632, "encoder_q-layer.5": 2385.6384, "encoder_q-layer.6": 1812.4191, "encoder_q-layer.7": 1415.2133, "encoder_q-layer.8": 1239.0547, "encoder_q-layer.9": 663.2217, "epoch": 0.66, "inbatch_neg_score": 0.3043, "inbatch_pos_score": 0.9224, "learning_rate": 1.8055555555555555e-05, "loss": 3.3991, "norm_diff": 0.049, "norm_loss": 0.0, "num_token_doc": 67.0288, "num_token_overlap": 14.5747, "num_token_query": 37.3601, "num_token_union": 65.5282, "num_word_context": 202.7009, "num_word_doc": 50.0143, "num_word_query": 27.9611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2924.1136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.3989, "queue_k_norm": 1.448, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3601, "sent_len_1": 67.0288, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6075, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3978, "doc_norm": 1.4532, "encoder_q-embeddings": 906.0259, "encoder_q-layer.0": 624.6652, "encoder_q-layer.1": 667.6838, "encoder_q-layer.10": 589.7686, "encoder_q-layer.11": 1462.4329, "encoder_q-layer.2": 850.2324, "encoder_q-layer.3": 919.2817, "encoder_q-layer.4": 915.2306, "encoder_q-layer.5": 972.7401, "encoder_q-layer.6": 898.8588, "encoder_q-layer.7": 878.8203, "encoder_q-layer.8": 808.9293, "encoder_q-layer.9": 595.0822, "epoch": 0.66, "inbatch_neg_score": 0.2997, "inbatch_pos_score": 0.9438, "learning_rate": 1.8e-05, "loss": 3.3978, "norm_diff": 0.0855, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 14.6658, "num_token_query": 37.5866, "num_token_union": 65.5219, "num_word_context": 202.4463, "num_word_doc": 49.8959, "num_word_query": 28.1398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1328.7333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2996, "query_norm": 1.3677, "queue_k_norm": 1.4493, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5866, "sent_len_1": 66.8494, "sent_len_max_0": 127.9675, "sent_len_max_1": 187.945, "stdk": 0.0488, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3872, "doc_norm": 1.443, "encoder_q-embeddings": 759.0338, "encoder_q-layer.0": 557.3801, "encoder_q-layer.1": 590.1339, "encoder_q-layer.10": 633.4136, "encoder_q-layer.11": 1542.3418, "encoder_q-layer.2": 711.6965, "encoder_q-layer.3": 736.2478, "encoder_q-layer.4": 762.6618, "encoder_q-layer.5": 736.7606, "encoder_q-layer.6": 821.5129, "encoder_q-layer.7": 793.9413, "encoder_q-layer.8": 727.0165, "encoder_q-layer.9": 597.3857, "epoch": 0.66, "inbatch_neg_score": 0.3065, "inbatch_pos_score": 0.9438, "learning_rate": 1.7944444444444443e-05, "loss": 3.3872, "norm_diff": 0.069, "norm_loss": 0.0, "num_token_doc": 66.6068, "num_token_overlap": 14.5368, "num_token_query": 37.2583, "num_token_union": 65.3143, "num_word_context": 202.217, "num_word_doc": 49.6993, "num_word_query": 27.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3044, "query_norm": 1.374, "queue_k_norm": 1.4496, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2583, "sent_len_1": 66.6068, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5687, "stdk": 0.0484, "stdq": 0.0439, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3966, "doc_norm": 1.4511, "encoder_q-embeddings": 1249.865, "encoder_q-layer.0": 896.0841, "encoder_q-layer.1": 901.1432, "encoder_q-layer.10": 1158.3821, "encoder_q-layer.11": 2849.3931, "encoder_q-layer.2": 1095.0688, "encoder_q-layer.3": 1060.8201, "encoder_q-layer.4": 1085.4799, "encoder_q-layer.5": 1067.0327, "encoder_q-layer.6": 1077.5874, "encoder_q-layer.7": 1107.6687, "encoder_q-layer.8": 1217.0156, "encoder_q-layer.9": 1098.9666, "epoch": 0.66, "inbatch_neg_score": 0.3056, "inbatch_pos_score": 0.9561, "learning_rate": 1.788888888888889e-05, "loss": 3.3966, "norm_diff": 0.0616, "norm_loss": 0.0, "num_token_doc": 66.6405, "num_token_overlap": 14.5545, "num_token_query": 37.2652, "num_token_union": 65.3186, "num_word_context": 202.2538, "num_word_doc": 49.7459, "num_word_query": 27.9026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2018.3142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.3895, "queue_k_norm": 1.4521, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2652, "sent_len_1": 66.6405, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7225, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4291, "doc_norm": 1.4531, "encoder_q-embeddings": 1015.6491, "encoder_q-layer.0": 677.1578, "encoder_q-layer.1": 718.6456, "encoder_q-layer.10": 1296.1844, "encoder_q-layer.11": 3103.7603, "encoder_q-layer.2": 799.7303, "encoder_q-layer.3": 823.7114, "encoder_q-layer.4": 859.3074, "encoder_q-layer.5": 898.9198, "encoder_q-layer.6": 1016.4078, "encoder_q-layer.7": 1115.8861, "encoder_q-layer.8": 1321.2045, "encoder_q-layer.9": 1246.9752, "epoch": 0.66, "inbatch_neg_score": 0.3071, "inbatch_pos_score": 0.96, "learning_rate": 1.7833333333333334e-05, "loss": 3.4291, "norm_diff": 0.0754, "norm_loss": 0.0, "num_token_doc": 66.5898, "num_token_overlap": 14.5554, "num_token_query": 37.0901, "num_token_union": 65.1476, "num_word_context": 201.6835, "num_word_doc": 49.6667, "num_word_query": 27.7583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1934.9471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3074, "query_norm": 1.3777, "queue_k_norm": 1.4526, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.0901, "sent_len_1": 66.5898, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.3063, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.4129, "doc_norm": 1.4545, "encoder_q-embeddings": 1274.8077, "encoder_q-layer.0": 885.535, "encoder_q-layer.1": 977.1939, "encoder_q-layer.10": 1132.9895, "encoder_q-layer.11": 2923.04, "encoder_q-layer.2": 1066.8253, "encoder_q-layer.3": 1057.7711, "encoder_q-layer.4": 1138.3605, "encoder_q-layer.5": 1171.58, "encoder_q-layer.6": 1232.2177, "encoder_q-layer.7": 1406.9102, "encoder_q-layer.8": 1381.8092, "encoder_q-layer.9": 1147.8588, "epoch": 0.66, "inbatch_neg_score": 0.3047, "inbatch_pos_score": 0.9541, "learning_rate": 1.777777777777778e-05, "loss": 3.4129, "norm_diff": 0.0772, "norm_loss": 0.0, "num_token_doc": 66.8759, "num_token_overlap": 14.6404, "num_token_query": 37.3803, "num_token_union": 65.4169, "num_word_context": 202.1771, "num_word_doc": 49.8913, "num_word_query": 27.9793, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2090.7862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.3774, "queue_k_norm": 1.454, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3803, "sent_len_1": 66.8759, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.11, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.4084, "doc_norm": 1.4542, "encoder_q-embeddings": 3307.3953, "encoder_q-layer.0": 2502.0447, "encoder_q-layer.1": 2877.3604, "encoder_q-layer.10": 1241.6456, "encoder_q-layer.11": 2975.2227, "encoder_q-layer.2": 3280.5457, "encoder_q-layer.3": 3280.4995, "encoder_q-layer.4": 2920.0405, "encoder_q-layer.5": 2090.907, "encoder_q-layer.6": 2361.8762, "encoder_q-layer.7": 2415.3547, "encoder_q-layer.8": 1838.5913, "encoder_q-layer.9": 1214.7446, "epoch": 0.66, "inbatch_neg_score": 0.3061, "inbatch_pos_score": 0.9585, "learning_rate": 1.7722222222222222e-05, "loss": 3.4084, "norm_diff": 0.0831, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 14.5634, "num_token_query": 37.2601, "num_token_union": 65.328, "num_word_context": 202.2332, "num_word_doc": 49.7933, "num_word_query": 27.8557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4027.4029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.3711, "queue_k_norm": 1.4542, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2601, "sent_len_1": 66.7383, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6175, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.404, "doc_norm": 1.4522, "encoder_q-embeddings": 2499.5725, "encoder_q-layer.0": 1697.2145, "encoder_q-layer.1": 2053.6213, "encoder_q-layer.10": 1199.7832, "encoder_q-layer.11": 3027.6716, "encoder_q-layer.2": 2476.6208, "encoder_q-layer.3": 3011.4204, "encoder_q-layer.4": 2653.167, "encoder_q-layer.5": 2845.2512, "encoder_q-layer.6": 2759.7803, "encoder_q-layer.7": 2536.814, "encoder_q-layer.8": 1764.9036, "encoder_q-layer.9": 1185.2908, "epoch": 0.67, "inbatch_neg_score": 0.3001, "inbatch_pos_score": 0.9648, "learning_rate": 1.7666666666666668e-05, "loss": 3.404, "norm_diff": 0.0798, "norm_loss": 0.0, "num_token_doc": 66.6309, "num_token_overlap": 14.5823, "num_token_query": 37.3583, "num_token_union": 65.3279, "num_word_context": 202.0196, "num_word_doc": 49.6784, "num_word_query": 27.9674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3531.4717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.3724, "queue_k_norm": 1.4524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3583, "sent_len_1": 66.6309, "sent_len_max_0": 127.995, "sent_len_max_1": 189.48, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.398, "doc_norm": 1.4521, "encoder_q-embeddings": 573.096, "encoder_q-layer.0": 385.9935, "encoder_q-layer.1": 417.7903, "encoder_q-layer.10": 598.4065, "encoder_q-layer.11": 1482.9448, "encoder_q-layer.2": 478.454, "encoder_q-layer.3": 489.3651, "encoder_q-layer.4": 500.3228, "encoder_q-layer.5": 508.2033, "encoder_q-layer.6": 560.5526, "encoder_q-layer.7": 645.8123, "encoder_q-layer.8": 697.6578, "encoder_q-layer.9": 596.0461, "epoch": 0.67, "inbatch_neg_score": 0.3019, "inbatch_pos_score": 0.938, "learning_rate": 1.761111111111111e-05, "loss": 3.398, "norm_diff": 0.0788, "norm_loss": 0.0, "num_token_doc": 66.7488, "num_token_overlap": 14.5433, "num_token_query": 37.3225, "num_token_union": 65.3926, "num_word_context": 202.2326, "num_word_doc": 49.793, "num_word_query": 27.9505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1007.5249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.3732, "queue_k_norm": 1.457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3225, "sent_len_1": 66.7488, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4625, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4071, "doc_norm": 1.4549, "encoder_q-embeddings": 774.9844, "encoder_q-layer.0": 535.8259, "encoder_q-layer.1": 583.4207, "encoder_q-layer.10": 643.368, "encoder_q-layer.11": 1491.4785, "encoder_q-layer.2": 689.8941, "encoder_q-layer.3": 674.6338, "encoder_q-layer.4": 741.4689, "encoder_q-layer.5": 723.5004, "encoder_q-layer.6": 832.5041, "encoder_q-layer.7": 785.4719, "encoder_q-layer.8": 728.5921, "encoder_q-layer.9": 594.7555, "epoch": 0.67, "inbatch_neg_score": 0.2966, "inbatch_pos_score": 0.9321, "learning_rate": 1.7555555555555556e-05, "loss": 3.4071, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 67.0577, "num_token_overlap": 14.6443, "num_token_query": 37.3078, "num_token_union": 65.4464, "num_word_context": 202.6978, "num_word_doc": 50.0408, "num_word_query": 27.915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1188.7298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.3483, "queue_k_norm": 1.457, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3078, "sent_len_1": 67.0577, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.1037, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4155, "doc_norm": 1.458, "encoder_q-embeddings": 919.3975, "encoder_q-layer.0": 617.3184, "encoder_q-layer.1": 637.8047, "encoder_q-layer.10": 565.1515, "encoder_q-layer.11": 1593.5811, "encoder_q-layer.2": 751.2105, "encoder_q-layer.3": 833.3803, "encoder_q-layer.4": 903.1632, "encoder_q-layer.5": 787.2347, "encoder_q-layer.6": 824.715, "encoder_q-layer.7": 839.8885, "encoder_q-layer.8": 758.215, "encoder_q-layer.9": 562.4689, "epoch": 0.67, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.9253, "learning_rate": 1.75e-05, "loss": 3.4155, "norm_diff": 0.1206, "norm_loss": 0.0, "num_token_doc": 66.5825, "num_token_overlap": 14.5877, "num_token_query": 37.4486, "num_token_union": 65.3356, "num_word_context": 202.3354, "num_word_doc": 49.6912, "num_word_query": 28.013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1310.6024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3374, "queue_k_norm": 1.4552, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4486, "sent_len_1": 66.5825, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2025, "stdk": 0.0488, "stdq": 0.0437, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4043, "doc_norm": 1.4597, "encoder_q-embeddings": 940.5117, "encoder_q-layer.0": 661.8689, "encoder_q-layer.1": 763.6303, "encoder_q-layer.10": 643.1786, "encoder_q-layer.11": 1540.6926, "encoder_q-layer.2": 906.1849, "encoder_q-layer.3": 924.9929, "encoder_q-layer.4": 940.8071, "encoder_q-layer.5": 833.7938, "encoder_q-layer.6": 786.4321, "encoder_q-layer.7": 808.6493, "encoder_q-layer.8": 736.2874, "encoder_q-layer.9": 597.6022, "epoch": 0.67, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.9463, "learning_rate": 1.7444444444444448e-05, "loss": 3.4043, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.6955, "num_token_overlap": 14.6354, "num_token_query": 37.6315, "num_token_union": 65.5159, "num_word_context": 202.6175, "num_word_doc": 49.7755, "num_word_query": 28.1805, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1342.7602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3499, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6315, "sent_len_1": 66.6955, "sent_len_max_0": 128.0, "sent_len_max_1": 188.695, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4303, "doc_norm": 1.4508, "encoder_q-embeddings": 805.5306, "encoder_q-layer.0": 577.4064, "encoder_q-layer.1": 622.0125, "encoder_q-layer.10": 553.8903, "encoder_q-layer.11": 1473.2494, "encoder_q-layer.2": 734.2662, "encoder_q-layer.3": 771.8441, "encoder_q-layer.4": 790.7411, "encoder_q-layer.5": 825.602, "encoder_q-layer.6": 871.0064, "encoder_q-layer.7": 932.9311, "encoder_q-layer.8": 881.0654, "encoder_q-layer.9": 581.0867, "epoch": 0.67, "inbatch_neg_score": 0.2919, "inbatch_pos_score": 0.9282, "learning_rate": 1.738888888888889e-05, "loss": 3.4303, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.5861, "num_token_overlap": 14.5328, "num_token_query": 37.1067, "num_token_union": 65.1941, "num_word_context": 201.8886, "num_word_doc": 49.7216, "num_word_query": 27.7648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1277.0591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.3395, "queue_k_norm": 1.4559, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1067, "sent_len_1": 66.5861, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.2725, "stdk": 0.0485, "stdq": 0.0438, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.4137, "doc_norm": 1.454, "encoder_q-embeddings": 1004.1126, "encoder_q-layer.0": 702.283, "encoder_q-layer.1": 749.4725, "encoder_q-layer.10": 593.7491, "encoder_q-layer.11": 1514.3704, "encoder_q-layer.2": 808.2782, "encoder_q-layer.3": 885.7931, "encoder_q-layer.4": 984.827, "encoder_q-layer.5": 919.5892, "encoder_q-layer.6": 1022.6495, "encoder_q-layer.7": 995.3961, "encoder_q-layer.8": 939.0556, "encoder_q-layer.9": 575.5181, "epoch": 0.67, "inbatch_neg_score": 0.2897, "inbatch_pos_score": 0.9238, "learning_rate": 1.7333333333333336e-05, "loss": 3.4137, "norm_diff": 0.1206, "norm_loss": 0.0, "num_token_doc": 66.8273, "num_token_overlap": 14.5732, "num_token_query": 37.236, "num_token_union": 65.3237, "num_word_context": 202.1163, "num_word_doc": 49.8297, "num_word_query": 27.8741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1391.7903, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.3333, "queue_k_norm": 1.4549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.236, "sent_len_1": 66.8273, "sent_len_max_0": 128.0, "sent_len_max_1": 192.345, "stdk": 0.0486, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.4019, "doc_norm": 1.4538, "encoder_q-embeddings": 837.0206, "encoder_q-layer.0": 563.722, "encoder_q-layer.1": 625.5182, "encoder_q-layer.10": 570.1448, "encoder_q-layer.11": 1554.9924, "encoder_q-layer.2": 728.492, "encoder_q-layer.3": 769.5261, "encoder_q-layer.4": 861.1173, "encoder_q-layer.5": 907.0508, "encoder_q-layer.6": 819.5247, "encoder_q-layer.7": 752.9194, "encoder_q-layer.8": 710.1769, "encoder_q-layer.9": 582.8432, "epoch": 0.67, "inbatch_neg_score": 0.2893, "inbatch_pos_score": 0.9185, "learning_rate": 1.7277777777777778e-05, "loss": 3.4019, "norm_diff": 0.1226, "norm_loss": 0.0, "num_token_doc": 66.9076, "num_token_overlap": 14.6353, "num_token_query": 37.3263, "num_token_union": 65.3809, "num_word_context": 202.2741, "num_word_doc": 49.9349, "num_word_query": 27.9632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1254.2202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2896, "query_norm": 1.3312, "queue_k_norm": 1.4531, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3263, "sent_len_1": 66.9076, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.8425, "stdk": 0.0486, "stdq": 0.0435, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4067, "doc_norm": 1.4553, "encoder_q-embeddings": 689.8578, "encoder_q-layer.0": 475.56, "encoder_q-layer.1": 526.6548, "encoder_q-layer.10": 563.714, "encoder_q-layer.11": 1397.0109, "encoder_q-layer.2": 640.2271, "encoder_q-layer.3": 702.5137, "encoder_q-layer.4": 716.7253, "encoder_q-layer.5": 680.8, "encoder_q-layer.6": 708.1445, "encoder_q-layer.7": 662.8525, "encoder_q-layer.8": 637.845, "encoder_q-layer.9": 549.9581, "epoch": 0.67, "inbatch_neg_score": 0.284, "inbatch_pos_score": 0.9321, "learning_rate": 1.7222222222222224e-05, "loss": 3.4067, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 67.0436, "num_token_overlap": 14.6719, "num_token_query": 37.4024, "num_token_union": 65.5177, "num_word_context": 202.597, "num_word_doc": 50.0541, "num_word_query": 27.9853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1097.647, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3393, "queue_k_norm": 1.4539, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4024, "sent_len_1": 67.0436, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.9313, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.4059, "doc_norm": 1.4652, "encoder_q-embeddings": 759.3846, "encoder_q-layer.0": 531.6034, "encoder_q-layer.1": 547.5856, "encoder_q-layer.10": 665.5981, "encoder_q-layer.11": 1680.5865, "encoder_q-layer.2": 643.8361, "encoder_q-layer.3": 647.3212, "encoder_q-layer.4": 713.514, "encoder_q-layer.5": 690.0012, "encoder_q-layer.6": 771.1481, "encoder_q-layer.7": 836.5098, "encoder_q-layer.8": 809.1539, "encoder_q-layer.9": 624.8545, "epoch": 0.67, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.9019, "learning_rate": 1.7166666666666666e-05, "loss": 3.4059, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 67.0071, "num_token_overlap": 14.6201, "num_token_query": 37.376, "num_token_union": 65.5147, "num_word_context": 202.5437, "num_word_doc": 49.9836, "num_word_query": 27.9977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1231.0371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3402, "queue_k_norm": 1.4548, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.376, "sent_len_1": 67.0071, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6138, "stdk": 0.0491, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3984, "doc_norm": 1.4478, "encoder_q-embeddings": 1832.1514, "encoder_q-layer.0": 1307.4612, "encoder_q-layer.1": 1440.6552, "encoder_q-layer.10": 578.0425, "encoder_q-layer.11": 1514.8198, "encoder_q-layer.2": 1480.6295, "encoder_q-layer.3": 1440.3754, "encoder_q-layer.4": 1190.5674, "encoder_q-layer.5": 1000.3004, "encoder_q-layer.6": 968.8151, "encoder_q-layer.7": 964.5848, "encoder_q-layer.8": 782.7304, "encoder_q-layer.9": 566.9896, "epoch": 0.68, "inbatch_neg_score": 0.2787, "inbatch_pos_score": 0.9097, "learning_rate": 1.7111111111111112e-05, "loss": 3.3984, "norm_diff": 0.1064, "norm_loss": 0.0, "num_token_doc": 66.7837, "num_token_overlap": 14.6046, "num_token_query": 37.4234, "num_token_union": 65.4117, "num_word_context": 202.2059, "num_word_doc": 49.874, "num_word_query": 28.0141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.8033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.3414, "queue_k_norm": 1.4541, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4234, "sent_len_1": 66.7837, "sent_len_max_0": 128.0, "sent_len_max_1": 186.8313, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3952, "doc_norm": 1.4523, "encoder_q-embeddings": 4943.1489, "encoder_q-layer.0": 3268.1599, "encoder_q-layer.1": 3542.2085, "encoder_q-layer.10": 591.9766, "encoder_q-layer.11": 1533.166, "encoder_q-layer.2": 4258.6118, "encoder_q-layer.3": 4796.104, "encoder_q-layer.4": 4392.8105, "encoder_q-layer.5": 4428.5078, "encoder_q-layer.6": 3894.0425, "encoder_q-layer.7": 3874.0928, "encoder_q-layer.8": 2183.9407, "encoder_q-layer.9": 680.9589, "epoch": 0.68, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 0.9116, "learning_rate": 1.7055555555555554e-05, "loss": 3.3952, "norm_diff": 0.1045, "norm_loss": 0.0, "num_token_doc": 66.8936, "num_token_overlap": 14.6303, "num_token_query": 37.4656, "num_token_union": 65.4751, "num_word_context": 202.5596, "num_word_doc": 49.8866, "num_word_query": 28.0542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5519.3308, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2766, "query_norm": 1.3478, "queue_k_norm": 1.454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4656, "sent_len_1": 66.8936, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7175, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3887, "doc_norm": 1.4559, "encoder_q-embeddings": 604.3062, "encoder_q-layer.0": 386.2988, "encoder_q-layer.1": 440.7671, "encoder_q-layer.10": 605.5468, "encoder_q-layer.11": 1493.4863, "encoder_q-layer.2": 467.0154, "encoder_q-layer.3": 478.0391, "encoder_q-layer.4": 492.4678, "encoder_q-layer.5": 495.2052, "encoder_q-layer.6": 550.6137, "encoder_q-layer.7": 570.0284, "encoder_q-layer.8": 651.6184, "encoder_q-layer.9": 578.7325, "epoch": 0.68, "inbatch_neg_score": 0.2788, "inbatch_pos_score": 0.9287, "learning_rate": 1.7000000000000003e-05, "loss": 3.3887, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 66.9208, "num_token_overlap": 14.6334, "num_token_query": 37.5715, "num_token_union": 65.5693, "num_word_context": 202.3341, "num_word_doc": 49.9546, "num_word_query": 28.1566, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1018.7294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2781, "query_norm": 1.3383, "queue_k_norm": 1.4541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5715, "sent_len_1": 66.9208, "sent_len_max_0": 128.0, "sent_len_max_1": 187.715, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3889, "doc_norm": 1.4559, "encoder_q-embeddings": 564.3395, "encoder_q-layer.0": 381.6777, "encoder_q-layer.1": 404.2704, "encoder_q-layer.10": 590.7677, "encoder_q-layer.11": 1495.8513, "encoder_q-layer.2": 435.3434, "encoder_q-layer.3": 440.7077, "encoder_q-layer.4": 442.0449, "encoder_q-layer.5": 440.1718, "encoder_q-layer.6": 506.8006, "encoder_q-layer.7": 565.061, "encoder_q-layer.8": 647.1529, "encoder_q-layer.9": 584.1232, "epoch": 0.68, "inbatch_neg_score": 0.2723, "inbatch_pos_score": 0.9087, "learning_rate": 1.6944444444444446e-05, "loss": 3.3889, "norm_diff": 0.1256, "norm_loss": 0.0, "num_token_doc": 66.9204, "num_token_overlap": 14.6826, "num_token_query": 37.5998, "num_token_union": 65.569, "num_word_context": 202.7344, "num_word_doc": 49.9414, "num_word_query": 28.1663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 985.9971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2722, "query_norm": 1.3304, "queue_k_norm": 1.4524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5998, "sent_len_1": 66.9204, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.4209, "doc_norm": 1.4552, "encoder_q-embeddings": 781.1636, "encoder_q-layer.0": 568.1979, "encoder_q-layer.1": 608.5515, "encoder_q-layer.10": 628.2598, "encoder_q-layer.11": 1511.1455, "encoder_q-layer.2": 698.0287, "encoder_q-layer.3": 713.1068, "encoder_q-layer.4": 676.9976, "encoder_q-layer.5": 679.8123, "encoder_q-layer.6": 704.6051, "encoder_q-layer.7": 680.6644, "encoder_q-layer.8": 711.3358, "encoder_q-layer.9": 605.564, "epoch": 0.68, "inbatch_neg_score": 0.2658, "inbatch_pos_score": 0.918, "learning_rate": 1.688888888888889e-05, "loss": 3.4209, "norm_diff": 0.1225, "norm_loss": 0.0, "num_token_doc": 66.6127, "num_token_overlap": 14.5899, "num_token_query": 37.4588, "num_token_union": 65.3516, "num_word_context": 202.026, "num_word_doc": 49.6895, "num_word_query": 28.0368, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.8226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2671, "query_norm": 1.3328, "queue_k_norm": 1.4528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4588, "sent_len_1": 66.6127, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3275, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4044, "doc_norm": 1.4518, "encoder_q-embeddings": 535.7347, "encoder_q-layer.0": 339.6968, "encoder_q-layer.1": 343.3197, "encoder_q-layer.10": 612.0564, "encoder_q-layer.11": 1466.9387, "encoder_q-layer.2": 377.7724, "encoder_q-layer.3": 389.3412, "encoder_q-layer.4": 417.2857, "encoder_q-layer.5": 417.7535, "encoder_q-layer.6": 508.3432, "encoder_q-layer.7": 548.1061, "encoder_q-layer.8": 665.5487, "encoder_q-layer.9": 597.1588, "epoch": 0.68, "inbatch_neg_score": 0.2703, "inbatch_pos_score": 0.9258, "learning_rate": 1.6833333333333334e-05, "loss": 3.4044, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 14.5221, "num_token_query": 37.1613, "num_token_union": 65.3528, "num_word_context": 202.3118, "num_word_doc": 49.884, "num_word_query": 27.841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 956.5995, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.271, "query_norm": 1.3369, "queue_k_norm": 1.4513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1613, "sent_len_1": 66.8029, "sent_len_max_0": 128.0, "sent_len_max_1": 188.34, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4168, "doc_norm": 1.4475, "encoder_q-embeddings": 680.9138, "encoder_q-layer.0": 499.2996, "encoder_q-layer.1": 566.7552, "encoder_q-layer.10": 589.4313, "encoder_q-layer.11": 1489.2252, "encoder_q-layer.2": 656.5256, "encoder_q-layer.3": 687.5579, "encoder_q-layer.4": 653.4742, "encoder_q-layer.5": 627.3608, "encoder_q-layer.6": 661.6631, "encoder_q-layer.7": 655.037, "encoder_q-layer.8": 666.9147, "encoder_q-layer.9": 536.8701, "epoch": 0.68, "inbatch_neg_score": 0.2685, "inbatch_pos_score": 0.9126, "learning_rate": 1.677777777777778e-05, "loss": 3.4168, "norm_diff": 0.1164, "norm_loss": 0.0, "num_token_doc": 66.8658, "num_token_overlap": 14.5138, "num_token_query": 37.3393, "num_token_union": 65.4687, "num_word_context": 202.359, "num_word_doc": 49.8858, "num_word_query": 27.9513, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1114.8351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2683, "query_norm": 1.3312, "queue_k_norm": 1.4504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3393, "sent_len_1": 66.8658, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5012, "stdk": 0.0485, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.4202, "doc_norm": 1.4576, "encoder_q-embeddings": 845.0373, "encoder_q-layer.0": 572.7551, "encoder_q-layer.1": 647.3808, "encoder_q-layer.10": 621.2742, "encoder_q-layer.11": 1539.0383, "encoder_q-layer.2": 766.153, "encoder_q-layer.3": 820.8504, "encoder_q-layer.4": 861.8605, "encoder_q-layer.5": 823.2385, "encoder_q-layer.6": 872.5737, "encoder_q-layer.7": 896.5578, "encoder_q-layer.8": 893.9886, "encoder_q-layer.9": 611.115, "epoch": 0.68, "inbatch_neg_score": 0.266, "inbatch_pos_score": 0.9316, "learning_rate": 1.6722222222222222e-05, "loss": 3.4202, "norm_diff": 0.1354, "norm_loss": 0.0, "num_token_doc": 66.8762, "num_token_overlap": 14.5553, "num_token_query": 37.2942, "num_token_union": 65.4108, "num_word_context": 202.4604, "num_word_doc": 49.9081, "num_word_query": 27.9322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1307.4834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2676, "query_norm": 1.3222, "queue_k_norm": 1.4493, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2942, "sent_len_1": 66.8762, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.195, "stdk": 0.049, "stdq": 0.0436, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.4077, "doc_norm": 1.4463, "encoder_q-embeddings": 582.739, "encoder_q-layer.0": 416.0242, "encoder_q-layer.1": 449.6866, "encoder_q-layer.10": 552.3844, "encoder_q-layer.11": 1448.0017, "encoder_q-layer.2": 508.0376, "encoder_q-layer.3": 550.557, "encoder_q-layer.4": 591.7698, "encoder_q-layer.5": 611.727, "encoder_q-layer.6": 557.5645, "encoder_q-layer.7": 572.0686, "encoder_q-layer.8": 615.3727, "encoder_q-layer.9": 546.1384, "epoch": 0.68, "inbatch_neg_score": 0.2678, "inbatch_pos_score": 0.9321, "learning_rate": 1.6666666666666667e-05, "loss": 3.4077, "norm_diff": 0.1127, "norm_loss": 0.0, "num_token_doc": 66.6529, "num_token_overlap": 14.5558, "num_token_query": 37.2203, "num_token_union": 65.2847, "num_word_context": 202.0646, "num_word_doc": 49.7743, "num_word_query": 27.8718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1008.6795, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2686, "query_norm": 1.3335, "queue_k_norm": 1.4495, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2203, "sent_len_1": 66.6529, "sent_len_max_0": 127.995, "sent_len_max_1": 188.5613, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 27.6481, "dev_samples_per_second": 2.315, "dev_steps_per_second": 0.036, "epoch": 0.68, "step": 70000, "test_accuracy": 93.24951171875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3935632109642029, "test_doc_norm": 1.4284110069274902, "test_inbatch_neg_score": 0.6072421669960022, "test_inbatch_pos_score": 1.5491682291030884, "test_loss": 0.3935632109642029, "test_loss_align": 1.021837830543518, "test_loss_unif": 3.848909854888916, "test_loss_unif_q@queue": 3.848909854888916, "test_norm_diff": 0.010123437270522118, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2610011696815491, "test_query_norm": 1.4306535720825195, "test_queue_k_norm": 1.4492285251617432, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042865484952926636, "test_stdq": 0.04238612949848175, "test_stdqueue_k": 0.04875386878848076, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.6481, "dev_samples_per_second": 2.315, "dev_steps_per_second": 0.036, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.3801, "eval_beir-arguana_recall@10": 0.64225, "eval_beir-arguana_recall@100": 0.9367, "eval_beir-arguana_recall@20": 0.77809, "eval_beir-avg_ndcg@10": 0.3798123333333333, "eval_beir-avg_recall@10": 0.4493001666666667, "eval_beir-avg_recall@100": 0.6286227500000001, "eval_beir-avg_recall@20": 0.5095429166666665, "eval_beir-cqadupstack_ndcg@10": 0.26535333333333333, "eval_beir-cqadupstack_recall@10": 0.35926166666666665, "eval_beir-cqadupstack_recall@100": 0.5887875, "eval_beir-cqadupstack_recall@20": 0.4262991666666667, "eval_beir-fiqa_ndcg@10": 0.22796, "eval_beir-fiqa_recall@10": 0.28283, "eval_beir-fiqa_recall@100": 0.54347, "eval_beir-fiqa_recall@20": 0.3709, "eval_beir-nfcorpus_ndcg@10": 0.3018, "eval_beir-nfcorpus_recall@10": 0.14895, "eval_beir-nfcorpus_recall@100": 0.28944, "eval_beir-nfcorpus_recall@20": 0.18028, "eval_beir-nq_ndcg@10": 0.26742, "eval_beir-nq_recall@10": 0.44402, "eval_beir-nq_recall@100": 0.77716, "eval_beir-nq_recall@20": 0.55451, "eval_beir-quora_ndcg@10": 0.77295, "eval_beir-quora_recall@10": 0.88596, "eval_beir-quora_recall@100": 0.97761, "eval_beir-quora_recall@20": 0.92723, "eval_beir-scidocs_ndcg@10": 0.14184, "eval_beir-scidocs_recall@10": 0.15207, "eval_beir-scidocs_recall@100": 0.35537, "eval_beir-scidocs_recall@20": 0.20867, "eval_beir-scifact_ndcg@10": 0.64008, "eval_beir-scifact_recall@10": 0.78633, "eval_beir-scifact_recall@100": 0.90156, "eval_beir-scifact_recall@20": 0.83244, "eval_beir-trec-covid_ndcg@10": 0.6157, "eval_beir-trec-covid_recall@10": 0.656, "eval_beir-trec-covid_recall@100": 0.4694, "eval_beir-trec-covid_recall@20": 0.604, "eval_beir-webis-touche2020_ndcg@10": 0.18492, "eval_beir-webis-touche2020_recall@10": 0.13533, "eval_beir-webis-touche2020_recall@100": 0.44673, "eval_beir-webis-touche2020_recall@20": 0.21301, "eval_senteval-avg_sts": 0.7508701576090422, "eval_senteval-sickr_spearman": 0.7213807935875644, "eval_senteval-stsb_spearman": 0.78035952163052, "step": 70000, "test_accuracy": 93.24951171875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3935632109642029, "test_doc_norm": 1.4284110069274902, "test_inbatch_neg_score": 0.6072421669960022, "test_inbatch_pos_score": 1.5491682291030884, "test_loss": 0.3935632109642029, "test_loss_align": 1.021837830543518, "test_loss_unif": 3.848909854888916, "test_loss_unif_q@queue": 3.848909854888916, "test_norm_diff": 0.010123437270522118, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2610011696815491, "test_query_norm": 1.4306535720825195, "test_queue_k_norm": 1.4492285251617432, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042865484952926636, "test_stdq": 0.04238612949848175, "test_stdqueue_k": 0.04875386878848076, "test_stdqueue_q": 0.0 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.405, "doc_norm": 1.4506, "encoder_q-embeddings": 757.8336, "encoder_q-layer.0": 535.7092, "encoder_q-layer.1": 583.8653, "encoder_q-layer.10": 577.0841, "encoder_q-layer.11": 1497.0465, "encoder_q-layer.2": 662.9964, "encoder_q-layer.3": 713.0988, "encoder_q-layer.4": 742.0294, "encoder_q-layer.5": 657.6832, "encoder_q-layer.6": 628.8224, "encoder_q-layer.7": 645.5111, "encoder_q-layer.8": 656.5657, "encoder_q-layer.9": 550.9218, "epoch": 0.68, "inbatch_neg_score": 0.2653, "inbatch_pos_score": 0.9209, "learning_rate": 1.661111111111111e-05, "loss": 3.405, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.9326, "num_token_overlap": 14.6171, "num_token_query": 37.4336, "num_token_union": 65.4863, "num_word_context": 202.5218, "num_word_doc": 49.9443, "num_word_query": 28.0314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1153.811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.3429, "queue_k_norm": 1.4492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4336, "sent_len_1": 66.9326, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0563, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.4004, "doc_norm": 1.4517, "encoder_q-embeddings": 2400.27, "encoder_q-layer.0": 1629.7999, "encoder_q-layer.1": 1637.7394, "encoder_q-layer.10": 551.4369, "encoder_q-layer.11": 1427.8384, "encoder_q-layer.2": 1872.9922, "encoder_q-layer.3": 1901.5347, "encoder_q-layer.4": 2157.6194, "encoder_q-layer.5": 1783.0657, "encoder_q-layer.6": 1400.9761, "encoder_q-layer.7": 1124.889, "encoder_q-layer.8": 935.7446, "encoder_q-layer.9": 602.636, "epoch": 0.69, "inbatch_neg_score": 0.2622, "inbatch_pos_score": 0.9248, "learning_rate": 1.655555555555556e-05, "loss": 3.4004, "norm_diff": 0.1168, "norm_loss": 0.0, "num_token_doc": 66.9079, "num_token_overlap": 14.5758, "num_token_query": 37.2746, "num_token_union": 65.4071, "num_word_context": 202.4671, "num_word_doc": 49.9413, "num_word_query": 27.8992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2572.5073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.262, "query_norm": 1.3349, "queue_k_norm": 1.4509, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2746, "sent_len_1": 66.9079, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2163, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4055, "doc_norm": 1.453, "encoder_q-embeddings": 1191.516, "encoder_q-layer.0": 807.7988, "encoder_q-layer.1": 820.54, "encoder_q-layer.10": 1340.824, "encoder_q-layer.11": 3077.9167, "encoder_q-layer.2": 910.8726, "encoder_q-layer.3": 923.9241, "encoder_q-layer.4": 940.7668, "encoder_q-layer.5": 928.1858, "encoder_q-layer.6": 1072.407, "encoder_q-layer.7": 1190.6627, "encoder_q-layer.8": 1340.3915, "encoder_q-layer.9": 1224.0767, "epoch": 0.69, "inbatch_neg_score": 0.267, "inbatch_pos_score": 0.9062, "learning_rate": 1.65e-05, "loss": 3.4055, "norm_diff": 0.1198, "norm_loss": 0.0, "num_token_doc": 66.9468, "num_token_overlap": 14.5871, "num_token_query": 37.2764, "num_token_union": 65.4284, "num_word_context": 202.3558, "num_word_doc": 49.9593, "num_word_query": 27.929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.1276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2671, "query_norm": 1.3331, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2764, "sent_len_1": 66.9468, "sent_len_max_0": 127.975, "sent_len_max_1": 188.2837, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3807, "doc_norm": 1.4506, "encoder_q-embeddings": 30361.5469, "encoder_q-layer.0": 21776.248, "encoder_q-layer.1": 21399.1523, "encoder_q-layer.10": 1281.7506, "encoder_q-layer.11": 3114.8059, "encoder_q-layer.2": 24587.5137, "encoder_q-layer.3": 25032.5371, "encoder_q-layer.4": 27518.4336, "encoder_q-layer.5": 23531.8613, "encoder_q-layer.6": 22367.7129, "encoder_q-layer.7": 19860.5918, "encoder_q-layer.8": 17251.8398, "encoder_q-layer.9": 2838.3188, "epoch": 0.69, "inbatch_neg_score": 0.2656, "inbatch_pos_score": 0.9121, "learning_rate": 1.6444444444444447e-05, "loss": 3.3807, "norm_diff": 0.1157, "norm_loss": 0.0, "num_token_doc": 66.6497, "num_token_overlap": 14.6911, "num_token_query": 37.5354, "num_token_union": 65.3592, "num_word_context": 202.5233, "num_word_doc": 49.7071, "num_word_query": 28.138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33246.1928, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.2656, "query_norm": 1.3349, "queue_k_norm": 1.4488, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5354, "sent_len_1": 66.6497, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6138, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3929, "doc_norm": 1.4487, "encoder_q-embeddings": 1803.5135, "encoder_q-layer.0": 1233.7554, "encoder_q-layer.1": 1306.6604, "encoder_q-layer.10": 1156.3307, "encoder_q-layer.11": 2940.6626, "encoder_q-layer.2": 1576.1398, "encoder_q-layer.3": 1616.4862, "encoder_q-layer.4": 1735.7458, "encoder_q-layer.5": 1768.4347, "encoder_q-layer.6": 1802.4999, "encoder_q-layer.7": 1809.5138, "encoder_q-layer.8": 1644.3105, "encoder_q-layer.9": 1079.0809, "epoch": 0.69, "inbatch_neg_score": 0.263, "inbatch_pos_score": 0.9194, "learning_rate": 1.638888888888889e-05, "loss": 3.3929, "norm_diff": 0.1198, "norm_loss": 0.0, "num_token_doc": 66.9108, "num_token_overlap": 14.687, "num_token_query": 37.5752, "num_token_union": 65.5673, "num_word_context": 202.5403, "num_word_doc": 49.9813, "num_word_query": 28.1683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2617.2036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2632, "query_norm": 1.3289, "queue_k_norm": 1.4476, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5752, "sent_len_1": 66.9108, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4925, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3647, "doc_norm": 1.4466, "encoder_q-embeddings": 1989.0862, "encoder_q-layer.0": 1370.3721, "encoder_q-layer.1": 1450.1592, "encoder_q-layer.10": 1319.595, "encoder_q-layer.11": 2931.0686, "encoder_q-layer.2": 1789.2301, "encoder_q-layer.3": 1804.0957, "encoder_q-layer.4": 1848.5637, "encoder_q-layer.5": 1728.8903, "encoder_q-layer.6": 1731.5009, "encoder_q-layer.7": 1868.566, "encoder_q-layer.8": 1684.2197, "encoder_q-layer.9": 1148.256, "epoch": 0.69, "inbatch_neg_score": 0.2642, "inbatch_pos_score": 0.9106, "learning_rate": 1.6333333333333335e-05, "loss": 3.3647, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.9418, "num_token_overlap": 14.6449, "num_token_query": 37.4042, "num_token_union": 65.4787, "num_word_context": 202.2656, "num_word_doc": 49.9528, "num_word_query": 28.0026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2759.5031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2646, "query_norm": 1.3415, "queue_k_norm": 1.4491, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4042, "sent_len_1": 66.9418, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.1488, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.3876, "doc_norm": 1.4519, "encoder_q-embeddings": 3629.0605, "encoder_q-layer.0": 2490.9102, "encoder_q-layer.1": 2609.7698, "encoder_q-layer.10": 1208.3009, "encoder_q-layer.11": 3186.9009, "encoder_q-layer.2": 2972.4426, "encoder_q-layer.3": 2845.2324, "encoder_q-layer.4": 2710.8242, "encoder_q-layer.5": 2697.2065, "encoder_q-layer.6": 1937.8391, "encoder_q-layer.7": 1771.8986, "encoder_q-layer.8": 1584.2924, "encoder_q-layer.9": 1201.8438, "epoch": 0.69, "inbatch_neg_score": 0.2629, "inbatch_pos_score": 0.9014, "learning_rate": 1.6277777777777777e-05, "loss": 3.3876, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.9958, "num_token_overlap": 14.617, "num_token_query": 37.3153, "num_token_union": 65.5006, "num_word_context": 202.5141, "num_word_doc": 49.9644, "num_word_query": 27.9318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3868.2057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2627, "query_norm": 1.3428, "queue_k_norm": 1.4485, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3153, "sent_len_1": 66.9958, "sent_len_max_0": 127.995, "sent_len_max_1": 187.2812, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.3893, "doc_norm": 1.4473, "encoder_q-embeddings": 2478.8259, "encoder_q-layer.0": 1714.7554, "encoder_q-layer.1": 1759.9315, "encoder_q-layer.10": 1096.7639, "encoder_q-layer.11": 3007.6794, "encoder_q-layer.2": 2099.9072, "encoder_q-layer.3": 2377.8, "encoder_q-layer.4": 2754.7258, "encoder_q-layer.5": 2423.2659, "encoder_q-layer.6": 1733.6102, "encoder_q-layer.7": 1600.052, "encoder_q-layer.8": 1469.9702, "encoder_q-layer.9": 1154.6436, "epoch": 0.69, "inbatch_neg_score": 0.2625, "inbatch_pos_score": 0.9355, "learning_rate": 1.6222222222222223e-05, "loss": 3.3893, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.9705, "num_token_overlap": 14.6843, "num_token_query": 37.5177, "num_token_union": 65.4948, "num_word_context": 202.547, "num_word_doc": 49.9732, "num_word_query": 28.1162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3081.2933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2629, "query_norm": 1.3415, "queue_k_norm": 1.4485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5177, "sent_len_1": 66.9705, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9575, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3996, "doc_norm": 1.4499, "encoder_q-embeddings": 1348.9568, "encoder_q-layer.0": 932.5327, "encoder_q-layer.1": 1064.9802, "encoder_q-layer.10": 1295.3169, "encoder_q-layer.11": 2943.4465, "encoder_q-layer.2": 1221.4464, "encoder_q-layer.3": 1179.3202, "encoder_q-layer.4": 1232.5101, "encoder_q-layer.5": 1227.9768, "encoder_q-layer.6": 1384.244, "encoder_q-layer.7": 1369.3462, "encoder_q-layer.8": 1425.8884, "encoder_q-layer.9": 1285.8456, "epoch": 0.69, "inbatch_neg_score": 0.2629, "inbatch_pos_score": 0.9146, "learning_rate": 1.6166666666666665e-05, "loss": 3.3996, "norm_diff": 0.1228, "norm_loss": 0.0, "num_token_doc": 66.6826, "num_token_overlap": 14.5795, "num_token_query": 37.3723, "num_token_union": 65.3759, "num_word_context": 202.4277, "num_word_doc": 49.7588, "num_word_query": 27.9824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2197.4719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.3271, "queue_k_norm": 1.4461, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3723, "sent_len_1": 66.6826, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.6538, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4113, "doc_norm": 1.4495, "encoder_q-embeddings": 1136.3035, "encoder_q-layer.0": 793.1761, "encoder_q-layer.1": 870.0831, "encoder_q-layer.10": 1281.8582, "encoder_q-layer.11": 3151.0645, "encoder_q-layer.2": 1016.9767, "encoder_q-layer.3": 1075.9725, "encoder_q-layer.4": 1103.4323, "encoder_q-layer.5": 1016.3923, "encoder_q-layer.6": 1041.3702, "encoder_q-layer.7": 1102.9094, "encoder_q-layer.8": 1334.2385, "encoder_q-layer.9": 1222.2645, "epoch": 0.69, "inbatch_neg_score": 0.2629, "inbatch_pos_score": 0.9062, "learning_rate": 1.6111111111111115e-05, "loss": 3.4113, "norm_diff": 0.1123, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 14.496, "num_token_query": 37.1189, "num_token_union": 65.2559, "num_word_context": 202.4336, "num_word_doc": 49.7755, "num_word_query": 27.7703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2068.8952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2637, "query_norm": 1.3372, "queue_k_norm": 1.4464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1189, "sent_len_1": 66.729, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9437, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4157, "doc_norm": 1.4424, "encoder_q-embeddings": 1052.6904, "encoder_q-layer.0": 708.5949, "encoder_q-layer.1": 738.9149, "encoder_q-layer.10": 1170.653, "encoder_q-layer.11": 3057.6333, "encoder_q-layer.2": 848.6815, "encoder_q-layer.3": 875.0297, "encoder_q-layer.4": 877.0395, "encoder_q-layer.5": 914.5507, "encoder_q-layer.6": 944.9355, "encoder_q-layer.7": 1112.6185, "encoder_q-layer.8": 1238.6611, "encoder_q-layer.9": 1102.3765, "epoch": 0.69, "inbatch_neg_score": 0.262, "inbatch_pos_score": 0.9072, "learning_rate": 1.6055555555555557e-05, "loss": 3.4157, "norm_diff": 0.105, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 14.6134, "num_token_query": 37.3733, "num_token_union": 65.3153, "num_word_context": 202.0088, "num_word_doc": 49.7992, "num_word_query": 27.9826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1964.471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2612, "query_norm": 1.3374, "queue_k_norm": 1.4451, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3733, "sent_len_1": 66.7179, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4837, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.388, "doc_norm": 1.4427, "encoder_q-embeddings": 2685.9294, "encoder_q-layer.0": 1940.0885, "encoder_q-layer.1": 2308.4287, "encoder_q-layer.10": 1150.1865, "encoder_q-layer.11": 2937.1951, "encoder_q-layer.2": 2703.6931, "encoder_q-layer.3": 3010.8816, "encoder_q-layer.4": 2891.2874, "encoder_q-layer.5": 3263.0698, "encoder_q-layer.6": 3181.5793, "encoder_q-layer.7": 2779.0935, "encoder_q-layer.8": 2147.6924, "encoder_q-layer.9": 1206.15, "epoch": 0.7, "inbatch_neg_score": 0.2628, "inbatch_pos_score": 0.9204, "learning_rate": 1.6000000000000003e-05, "loss": 3.388, "norm_diff": 0.0981, "norm_loss": 0.0, "num_token_doc": 66.9624, "num_token_overlap": 14.6064, "num_token_query": 37.2673, "num_token_union": 65.428, "num_word_context": 202.1977, "num_word_doc": 49.9695, "num_word_query": 27.9071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3867.8016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2617, "query_norm": 1.3447, "queue_k_norm": 1.447, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2673, "sent_len_1": 66.9624, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0575, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3854, "doc_norm": 1.4568, "encoder_q-embeddings": 1037.7537, "encoder_q-layer.0": 691.2194, "encoder_q-layer.1": 722.5621, "encoder_q-layer.10": 1162.9073, "encoder_q-layer.11": 3065.0574, "encoder_q-layer.2": 800.095, "encoder_q-layer.3": 816.9977, "encoder_q-layer.4": 891.7065, "encoder_q-layer.5": 923.6993, "encoder_q-layer.6": 1019.8195, "encoder_q-layer.7": 1198.3895, "encoder_q-layer.8": 1365.0767, "encoder_q-layer.9": 1156.3979, "epoch": 0.7, "inbatch_neg_score": 0.2613, "inbatch_pos_score": 0.9106, "learning_rate": 1.5944444444444445e-05, "loss": 3.3854, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.8186, "num_token_overlap": 14.5739, "num_token_query": 37.2117, "num_token_union": 65.3804, "num_word_context": 202.4753, "num_word_doc": 49.8852, "num_word_query": 27.8598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.6596, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.3387, "queue_k_norm": 1.4449, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2117, "sent_len_1": 66.8186, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7012, "stdk": 0.0491, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4, "doc_norm": 1.4505, "encoder_q-embeddings": 1154.9603, "encoder_q-layer.0": 784.5027, "encoder_q-layer.1": 822.2344, "encoder_q-layer.10": 1121.4661, "encoder_q-layer.11": 3000.9597, "encoder_q-layer.2": 952.6641, "encoder_q-layer.3": 956.3581, "encoder_q-layer.4": 969.4894, "encoder_q-layer.5": 943.5975, "encoder_q-layer.6": 1029.4143, "encoder_q-layer.7": 1068.4969, "encoder_q-layer.8": 1230.0439, "encoder_q-layer.9": 1124.3328, "epoch": 0.7, "inbatch_neg_score": 0.2613, "inbatch_pos_score": 0.9043, "learning_rate": 1.588888888888889e-05, "loss": 3.4, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 67.0508, "num_token_overlap": 14.6204, "num_token_query": 37.4, "num_token_union": 65.5648, "num_word_context": 202.7443, "num_word_doc": 49.9972, "num_word_query": 27.9818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1986.1361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.3438, "queue_k_norm": 1.4428, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4, "sent_len_1": 67.0508, "sent_len_max_0": 127.99, "sent_len_max_1": 192.025, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.3858, "doc_norm": 1.4496, "encoder_q-embeddings": 1140.8394, "encoder_q-layer.0": 767.6652, "encoder_q-layer.1": 814.8798, "encoder_q-layer.10": 1149.6876, "encoder_q-layer.11": 2893.7639, "encoder_q-layer.2": 929.0709, "encoder_q-layer.3": 939.7883, "encoder_q-layer.4": 978.7614, "encoder_q-layer.5": 1012.8758, "encoder_q-layer.6": 1097.9019, "encoder_q-layer.7": 1200.5912, "encoder_q-layer.8": 1256.575, "encoder_q-layer.9": 1086.9038, "epoch": 0.7, "inbatch_neg_score": 0.2596, "inbatch_pos_score": 0.8877, "learning_rate": 1.5833333333333333e-05, "loss": 3.3858, "norm_diff": 0.1315, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 14.5662, "num_token_query": 37.246, "num_token_union": 65.4115, "num_word_context": 202.067, "num_word_doc": 49.8556, "num_word_query": 27.8782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.4577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.259, "query_norm": 1.3181, "queue_k_norm": 1.4453, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.246, "sent_len_1": 66.8051, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2287, "stdk": 0.0488, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3869, "doc_norm": 1.4518, "encoder_q-embeddings": 1391.3278, "encoder_q-layer.0": 1059.4271, "encoder_q-layer.1": 1161.4458, "encoder_q-layer.10": 1107.8221, "encoder_q-layer.11": 2857.541, "encoder_q-layer.2": 1351.0253, "encoder_q-layer.3": 1480.3553, "encoder_q-layer.4": 1590.4979, "encoder_q-layer.5": 1409.809, "encoder_q-layer.6": 1290.5812, "encoder_q-layer.7": 1270.5469, "encoder_q-layer.8": 1336.9399, "encoder_q-layer.9": 1062.0542, "epoch": 0.7, "inbatch_neg_score": 0.2584, "inbatch_pos_score": 0.9155, "learning_rate": 1.577777777777778e-05, "loss": 3.3869, "norm_diff": 0.1255, "norm_loss": 0.0, "num_token_doc": 66.9227, "num_token_overlap": 14.596, "num_token_query": 37.409, "num_token_union": 65.5157, "num_word_context": 202.2352, "num_word_doc": 49.9156, "num_word_query": 27.9915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2245.3027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2588, "query_norm": 1.3263, "queue_k_norm": 1.4451, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.409, "sent_len_1": 66.9227, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.115, "stdk": 0.049, "stdq": 0.0438, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.395, "doc_norm": 1.4448, "encoder_q-embeddings": 1103.1034, "encoder_q-layer.0": 722.3619, "encoder_q-layer.1": 747.2021, "encoder_q-layer.10": 1072.4899, "encoder_q-layer.11": 2816.855, "encoder_q-layer.2": 844.958, "encoder_q-layer.3": 890.4775, "encoder_q-layer.4": 893.7352, "encoder_q-layer.5": 856.5358, "encoder_q-layer.6": 939.2089, "encoder_q-layer.7": 1043.2068, "encoder_q-layer.8": 1177.363, "encoder_q-layer.9": 1054.7853, "epoch": 0.7, "inbatch_neg_score": 0.2578, "inbatch_pos_score": 0.9287, "learning_rate": 1.5722222222222225e-05, "loss": 3.395, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.9843, "num_token_overlap": 14.5736, "num_token_query": 37.3311, "num_token_union": 65.4974, "num_word_context": 202.4936, "num_word_doc": 49.9869, "num_word_query": 27.9401, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1859.2305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.348, "queue_k_norm": 1.4433, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3311, "sent_len_1": 66.9843, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6637, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3871, "doc_norm": 1.4443, "encoder_q-embeddings": 1154.547, "encoder_q-layer.0": 755.5991, "encoder_q-layer.1": 780.3887, "encoder_q-layer.10": 1231.3519, "encoder_q-layer.11": 2966.2214, "encoder_q-layer.2": 872.1964, "encoder_q-layer.3": 887.092, "encoder_q-layer.4": 962.7593, "encoder_q-layer.5": 997.164, "encoder_q-layer.6": 1088.2318, "encoder_q-layer.7": 1172.0688, "encoder_q-layer.8": 1338.4448, "encoder_q-layer.9": 1181.0741, "epoch": 0.7, "inbatch_neg_score": 0.2628, "inbatch_pos_score": 0.9155, "learning_rate": 1.5666666666666667e-05, "loss": 3.3871, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 14.5059, "num_token_query": 37.0941, "num_token_union": 65.2651, "num_word_context": 202.2393, "num_word_doc": 49.803, "num_word_query": 27.7973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1994.6396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2629, "query_norm": 1.3331, "queue_k_norm": 1.4441, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0941, "sent_len_1": 66.7018, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2688, "stdk": 0.0487, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.4071, "doc_norm": 1.4379, "encoder_q-embeddings": 1471.6641, "encoder_q-layer.0": 1018.4708, "encoder_q-layer.1": 1178.4062, "encoder_q-layer.10": 1328.0745, "encoder_q-layer.11": 3040.7263, "encoder_q-layer.2": 1390.0699, "encoder_q-layer.3": 1445.2263, "encoder_q-layer.4": 1442.8901, "encoder_q-layer.5": 1495.8915, "encoder_q-layer.6": 1600.1619, "encoder_q-layer.7": 1668.2462, "encoder_q-layer.8": 1661.8633, "encoder_q-layer.9": 1329.6396, "epoch": 0.7, "inbatch_neg_score": 0.2653, "inbatch_pos_score": 0.8979, "learning_rate": 1.5611111111111113e-05, "loss": 3.4071, "norm_diff": 0.0693, "norm_loss": 0.0, "num_token_doc": 66.6807, "num_token_overlap": 14.5252, "num_token_query": 37.1459, "num_token_union": 65.2381, "num_word_context": 201.9573, "num_word_doc": 49.7336, "num_word_query": 27.8047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2410.2133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.3686, "queue_k_norm": 1.4436, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1459, "sent_len_1": 66.6807, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1887, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3778, "doc_norm": 1.443, "encoder_q-embeddings": 1060.0188, "encoder_q-layer.0": 720.2218, "encoder_q-layer.1": 770.501, "encoder_q-layer.10": 1348.5872, "encoder_q-layer.11": 3014.0659, "encoder_q-layer.2": 865.1182, "encoder_q-layer.3": 888.7444, "encoder_q-layer.4": 947.554, "encoder_q-layer.5": 1030.2479, "encoder_q-layer.6": 1073.3607, "encoder_q-layer.7": 1129.6742, "encoder_q-layer.8": 1358.4248, "encoder_q-layer.9": 1272.9222, "epoch": 0.7, "inbatch_neg_score": 0.2702, "inbatch_pos_score": 0.9258, "learning_rate": 1.5555555555555555e-05, "loss": 3.3778, "norm_diff": 0.079, "norm_loss": 0.0, "num_token_doc": 66.6752, "num_token_overlap": 14.5897, "num_token_query": 37.3155, "num_token_union": 65.3257, "num_word_context": 202.2403, "num_word_doc": 49.7941, "num_word_query": 27.9619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1971.8384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2703, "query_norm": 1.364, "queue_k_norm": 1.4464, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3155, "sent_len_1": 66.6752, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7237, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3902, "doc_norm": 1.4471, "encoder_q-embeddings": 1895.2705, "encoder_q-layer.0": 1428.275, "encoder_q-layer.1": 1543.8287, "encoder_q-layer.10": 1132.4253, "encoder_q-layer.11": 3122.0552, "encoder_q-layer.2": 1775.5333, "encoder_q-layer.3": 1700.0557, "encoder_q-layer.4": 1877.2804, "encoder_q-layer.5": 1861.5071, "encoder_q-layer.6": 1690.3684, "encoder_q-layer.7": 1509.3987, "encoder_q-layer.8": 1527.6163, "encoder_q-layer.9": 1166.6973, "epoch": 0.7, "inbatch_neg_score": 0.2742, "inbatch_pos_score": 0.9258, "learning_rate": 1.55e-05, "loss": 3.3902, "norm_diff": 0.0962, "norm_loss": 0.0, "num_token_doc": 66.8483, "num_token_overlap": 14.5715, "num_token_query": 37.4621, "num_token_union": 65.5642, "num_word_context": 202.6971, "num_word_doc": 49.9356, "num_word_query": 28.0815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2752.4212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2732, "query_norm": 1.3509, "queue_k_norm": 1.4456, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4621, "sent_len_1": 66.8483, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.0225, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4005, "doc_norm": 1.4477, "encoder_q-embeddings": 1027.052, "encoder_q-layer.0": 696.3136, "encoder_q-layer.1": 705.7413, "encoder_q-layer.10": 1114.637, "encoder_q-layer.11": 2959.7007, "encoder_q-layer.2": 778.9238, "encoder_q-layer.3": 791.2771, "encoder_q-layer.4": 851.5361, "encoder_q-layer.5": 840.3441, "encoder_q-layer.6": 945.5948, "encoder_q-layer.7": 1052.6526, "encoder_q-layer.8": 1221.1633, "encoder_q-layer.9": 1109.5189, "epoch": 0.7, "inbatch_neg_score": 0.2742, "inbatch_pos_score": 0.9106, "learning_rate": 1.5444444444444446e-05, "loss": 3.4005, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 66.5554, "num_token_overlap": 14.5805, "num_token_query": 37.2661, "num_token_union": 65.2174, "num_word_context": 202.4736, "num_word_doc": 49.674, "num_word_query": 27.9073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1907.7006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2734, "query_norm": 1.3404, "queue_k_norm": 1.4468, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2661, "sent_len_1": 66.5554, "sent_len_max_0": 127.995, "sent_len_max_1": 187.8487, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.3913, "doc_norm": 1.4445, "encoder_q-embeddings": 2085.0417, "encoder_q-layer.0": 1410.9553, "encoder_q-layer.1": 1492.5825, "encoder_q-layer.10": 2411.0867, "encoder_q-layer.11": 6051.2534, "encoder_q-layer.2": 1659.1115, "encoder_q-layer.3": 1764.0247, "encoder_q-layer.4": 1805.6523, "encoder_q-layer.5": 1715.7633, "encoder_q-layer.6": 1971.662, "encoder_q-layer.7": 2193.8428, "encoder_q-layer.8": 2768.3022, "encoder_q-layer.9": 2329.947, "epoch": 0.71, "inbatch_neg_score": 0.2792, "inbatch_pos_score": 0.9072, "learning_rate": 1.538888888888889e-05, "loss": 3.3913, "norm_diff": 0.0975, "norm_loss": 0.0, "num_token_doc": 66.6235, "num_token_overlap": 14.5394, "num_token_query": 37.2751, "num_token_union": 65.2545, "num_word_context": 201.9954, "num_word_doc": 49.7543, "num_word_query": 27.9361, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3939.6893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.3471, "queue_k_norm": 1.4442, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2751, "sent_len_1": 66.6235, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6362, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3905, "doc_norm": 1.4499, "encoder_q-embeddings": 4038.6414, "encoder_q-layer.0": 2832.53, "encoder_q-layer.1": 3410.8435, "encoder_q-layer.10": 2264.1748, "encoder_q-layer.11": 6148.4849, "encoder_q-layer.2": 4128.9038, "encoder_q-layer.3": 4599.0884, "encoder_q-layer.4": 4763.4097, "encoder_q-layer.5": 4714.2812, "encoder_q-layer.6": 4929.187, "encoder_q-layer.7": 4564.0098, "encoder_q-layer.8": 3562.6367, "encoder_q-layer.9": 2339.447, "epoch": 0.71, "inbatch_neg_score": 0.2778, "inbatch_pos_score": 0.9219, "learning_rate": 1.5333333333333334e-05, "loss": 3.3905, "norm_diff": 0.0954, "norm_loss": 0.0, "num_token_doc": 66.9327, "num_token_overlap": 14.5753, "num_token_query": 37.3664, "num_token_union": 65.4741, "num_word_context": 202.1975, "num_word_doc": 49.8674, "num_word_query": 27.9606, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6248.1057, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2783, "query_norm": 1.3545, "queue_k_norm": 1.4457, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3664, "sent_len_1": 66.9327, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4462, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.3863, "doc_norm": 1.4424, "encoder_q-embeddings": 3019.1602, "encoder_q-layer.0": 2009.6147, "encoder_q-layer.1": 2126.0142, "encoder_q-layer.10": 2772.6191, "encoder_q-layer.11": 6270.5742, "encoder_q-layer.2": 2592.5972, "encoder_q-layer.3": 2692.0237, "encoder_q-layer.4": 2804.428, "encoder_q-layer.5": 2920.9324, "encoder_q-layer.6": 2849.3806, "encoder_q-layer.7": 2877.0679, "encoder_q-layer.8": 2881.0681, "encoder_q-layer.9": 2394.8479, "epoch": 0.71, "inbatch_neg_score": 0.2808, "inbatch_pos_score": 0.9028, "learning_rate": 1.527777777777778e-05, "loss": 3.3863, "norm_diff": 0.1019, "norm_loss": 0.0, "num_token_doc": 66.8483, "num_token_overlap": 14.6113, "num_token_query": 37.3379, "num_token_union": 65.3877, "num_word_context": 202.1915, "num_word_doc": 49.8622, "num_word_query": 27.9525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4775.3079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2815, "query_norm": 1.3405, "queue_k_norm": 1.4481, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3379, "sent_len_1": 66.8483, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.9087, "stdk": 0.0486, "stdq": 0.0437, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3786, "doc_norm": 1.4473, "encoder_q-embeddings": 2359.7036, "encoder_q-layer.0": 1636.2125, "encoder_q-layer.1": 1826.0814, "encoder_q-layer.10": 2137.4097, "encoder_q-layer.11": 6002.6284, "encoder_q-layer.2": 2057.5112, "encoder_q-layer.3": 2026.5372, "encoder_q-layer.4": 2241.6443, "encoder_q-layer.5": 2146.5969, "encoder_q-layer.6": 2194.4536, "encoder_q-layer.7": 2352.9417, "encoder_q-layer.8": 2539.6226, "encoder_q-layer.9": 2211.1536, "epoch": 0.71, "inbatch_neg_score": 0.2871, "inbatch_pos_score": 0.9531, "learning_rate": 1.5222222222222224e-05, "loss": 3.3786, "norm_diff": 0.0813, "norm_loss": 0.0, "num_token_doc": 66.943, "num_token_overlap": 14.6535, "num_token_query": 37.3348, "num_token_union": 65.4044, "num_word_context": 202.3218, "num_word_doc": 49.9489, "num_word_query": 27.9631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4106.4853, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.366, "queue_k_norm": 1.4485, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3348, "sent_len_1": 66.943, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.8862, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.39, "doc_norm": 1.4522, "encoder_q-embeddings": 3235.8179, "encoder_q-layer.0": 2068.7634, "encoder_q-layer.1": 2403.0276, "encoder_q-layer.10": 2439.6133, "encoder_q-layer.11": 6242.9937, "encoder_q-layer.2": 2694.1567, "encoder_q-layer.3": 2962.4939, "encoder_q-layer.4": 2803.6941, "encoder_q-layer.5": 2782.071, "encoder_q-layer.6": 2662.425, "encoder_q-layer.7": 2595.4229, "encoder_q-layer.8": 2730.9224, "encoder_q-layer.9": 2473.0176, "epoch": 0.71, "inbatch_neg_score": 0.2819, "inbatch_pos_score": 0.9272, "learning_rate": 1.5166666666666668e-05, "loss": 3.39, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.8772, "num_token_overlap": 14.6339, "num_token_query": 37.4025, "num_token_union": 65.424, "num_word_context": 202.255, "num_word_doc": 49.9228, "num_word_query": 27.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4696.5081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.282, "query_norm": 1.3362, "queue_k_norm": 1.4474, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4025, "sent_len_1": 66.8772, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5788, "stdk": 0.0488, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.382, "doc_norm": 1.4487, "encoder_q-embeddings": 2427.7744, "encoder_q-layer.0": 1716.8798, "encoder_q-layer.1": 1791.3436, "encoder_q-layer.10": 2252.0833, "encoder_q-layer.11": 5952.6265, "encoder_q-layer.2": 2026.3052, "encoder_q-layer.3": 2154.5452, "encoder_q-layer.4": 2380.3831, "encoder_q-layer.5": 2317.7734, "encoder_q-layer.6": 2369.8865, "encoder_q-layer.7": 2176.8157, "encoder_q-layer.8": 2437.8374, "encoder_q-layer.9": 2231.2759, "epoch": 0.71, "inbatch_neg_score": 0.2856, "inbatch_pos_score": 0.9492, "learning_rate": 1.5111111111111112e-05, "loss": 3.382, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.5499, "num_token_overlap": 14.5655, "num_token_query": 37.3067, "num_token_union": 65.2792, "num_word_context": 202.4395, "num_word_doc": 49.6843, "num_word_query": 27.9209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4175.1502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3518, "queue_k_norm": 1.4485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3067, "sent_len_1": 66.5499, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2125, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.4105, "doc_norm": 1.4503, "encoder_q-embeddings": 3927.9978, "encoder_q-layer.0": 2689.4695, "encoder_q-layer.1": 3079.7932, "encoder_q-layer.10": 2414.9397, "encoder_q-layer.11": 6222.1245, "encoder_q-layer.2": 3706.5962, "encoder_q-layer.3": 4162.9766, "encoder_q-layer.4": 3706.614, "encoder_q-layer.5": 3073.7983, "encoder_q-layer.6": 2657.4946, "encoder_q-layer.7": 2605.5085, "encoder_q-layer.8": 2815.3745, "encoder_q-layer.9": 2427.3242, "epoch": 0.71, "inbatch_neg_score": 0.2861, "inbatch_pos_score": 0.9385, "learning_rate": 1.5055555555555556e-05, "loss": 3.4105, "norm_diff": 0.0994, "norm_loss": 0.0, "num_token_doc": 66.7167, "num_token_overlap": 14.5143, "num_token_query": 37.304, "num_token_union": 65.39, "num_word_context": 202.217, "num_word_doc": 49.7897, "num_word_query": 27.9038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5313.6072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.351, "queue_k_norm": 1.4498, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.304, "sent_len_1": 66.7167, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8187, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3883, "doc_norm": 1.4485, "encoder_q-embeddings": 4678.0288, "encoder_q-layer.0": 3362.5437, "encoder_q-layer.1": 3895.9646, "encoder_q-layer.10": 2287.2466, "encoder_q-layer.11": 6221.4316, "encoder_q-layer.2": 4880.8633, "encoder_q-layer.3": 5147.5352, "encoder_q-layer.4": 5878.4004, "encoder_q-layer.5": 5380.1772, "encoder_q-layer.6": 4149.2832, "encoder_q-layer.7": 3899.1582, "encoder_q-layer.8": 3321.6838, "encoder_q-layer.9": 2262.8364, "epoch": 0.71, "inbatch_neg_score": 0.288, "inbatch_pos_score": 0.9224, "learning_rate": 1.5e-05, "loss": 3.3883, "norm_diff": 0.1153, "norm_loss": 0.0, "num_token_doc": 66.9024, "num_token_overlap": 14.6373, "num_token_query": 37.5105, "num_token_union": 65.4781, "num_word_context": 202.4004, "num_word_doc": 49.9171, "num_word_query": 28.0884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6669.6755, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2869, "query_norm": 1.3331, "queue_k_norm": 1.4507, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5105, "sent_len_1": 66.9024, "sent_len_max_0": 128.0, "sent_len_max_1": 189.97, "stdk": 0.0486, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3923, "doc_norm": 1.4485, "encoder_q-embeddings": 17844.7578, "encoder_q-layer.0": 12306.957, "encoder_q-layer.1": 15410.5693, "encoder_q-layer.10": 2620.2856, "encoder_q-layer.11": 6384.1924, "encoder_q-layer.2": 17702.1172, "encoder_q-layer.3": 17644.8398, "encoder_q-layer.4": 18483.4375, "encoder_q-layer.5": 16044.2002, "encoder_q-layer.6": 15761.9062, "encoder_q-layer.7": 15510.6494, "encoder_q-layer.8": 11623.6592, "encoder_q-layer.9": 3322.9424, "epoch": 0.71, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 0.9253, "learning_rate": 1.4944444444444444e-05, "loss": 3.3923, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.6779, "num_token_overlap": 14.5911, "num_token_query": 37.3108, "num_token_union": 65.3159, "num_word_context": 201.8658, "num_word_doc": 49.7353, "num_word_query": 27.947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22123.6719, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2908, "query_norm": 1.3534, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3108, "sent_len_1": 66.6779, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3925, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3823, "doc_norm": 1.4451, "encoder_q-embeddings": 2337.7441, "encoder_q-layer.0": 1568.7407, "encoder_q-layer.1": 1683.6194, "encoder_q-layer.10": 2420.5112, "encoder_q-layer.11": 6306.9209, "encoder_q-layer.2": 1900.264, "encoder_q-layer.3": 2021.2272, "encoder_q-layer.4": 2102.6152, "encoder_q-layer.5": 2072.7095, "encoder_q-layer.6": 2161.8716, "encoder_q-layer.7": 2420.8823, "encoder_q-layer.8": 2715.6367, "encoder_q-layer.9": 2312.8613, "epoch": 0.71, "inbatch_neg_score": 0.2873, "inbatch_pos_score": 0.9517, "learning_rate": 1.4888888888888888e-05, "loss": 3.3823, "norm_diff": 0.0874, "norm_loss": 0.0, "num_token_doc": 66.8711, "num_token_overlap": 14.5911, "num_token_query": 37.5035, "num_token_union": 65.5387, "num_word_context": 202.309, "num_word_doc": 49.93, "num_word_query": 28.0868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4194.1995, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.3577, "queue_k_norm": 1.4514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5035, "sent_len_1": 66.8711, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1075, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.371, "doc_norm": 1.4541, "encoder_q-embeddings": 3210.4536, "encoder_q-layer.0": 2292.6543, "encoder_q-layer.1": 2401.9497, "encoder_q-layer.10": 2502.4524, "encoder_q-layer.11": 6048.6758, "encoder_q-layer.2": 2674.1453, "encoder_q-layer.3": 2697.98, "encoder_q-layer.4": 2662.6436, "encoder_q-layer.5": 2885.9966, "encoder_q-layer.6": 2807.446, "encoder_q-layer.7": 2999.8235, "encoder_q-layer.8": 2804.4343, "encoder_q-layer.9": 2219.6772, "epoch": 0.72, "inbatch_neg_score": 0.2842, "inbatch_pos_score": 0.9497, "learning_rate": 1.4833333333333336e-05, "loss": 3.371, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 66.8349, "num_token_overlap": 14.6434, "num_token_query": 37.3664, "num_token_union": 65.3663, "num_word_context": 202.3566, "num_word_doc": 49.8777, "num_word_query": 27.9555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4671.4519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3581, "queue_k_norm": 1.4521, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3664, "sent_len_1": 66.8349, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.5275, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.4078, "doc_norm": 1.4547, "encoder_q-embeddings": 2265.4944, "encoder_q-layer.0": 1513.6656, "encoder_q-layer.1": 1629.4498, "encoder_q-layer.10": 2548.5925, "encoder_q-layer.11": 6011.4639, "encoder_q-layer.2": 1834.9052, "encoder_q-layer.3": 1974.0292, "encoder_q-layer.4": 2103.6165, "encoder_q-layer.5": 2091.0022, "encoder_q-layer.6": 2278.6941, "encoder_q-layer.7": 2369.7683, "encoder_q-layer.8": 2627.9397, "encoder_q-layer.9": 2307.4873, "epoch": 0.72, "inbatch_neg_score": 0.283, "inbatch_pos_score": 0.9409, "learning_rate": 1.477777777777778e-05, "loss": 3.4078, "norm_diff": 0.1123, "norm_loss": 0.0, "num_token_doc": 66.8452, "num_token_overlap": 14.5479, "num_token_query": 37.1911, "num_token_union": 65.3507, "num_word_context": 202.3553, "num_word_doc": 49.8542, "num_word_query": 27.8115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4072.8699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3424, "queue_k_norm": 1.4512, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1911, "sent_len_1": 66.8452, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9975, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4115, "doc_norm": 1.4518, "encoder_q-embeddings": 1918.6542, "encoder_q-layer.0": 1291.8851, "encoder_q-layer.1": 1359.6978, "encoder_q-layer.10": 2182.9351, "encoder_q-layer.11": 6218.4346, "encoder_q-layer.2": 1483.4993, "encoder_q-layer.3": 1485.2151, "encoder_q-layer.4": 1527.2562, "encoder_q-layer.5": 1580.7136, "encoder_q-layer.6": 1773.8486, "encoder_q-layer.7": 1962.4467, "encoder_q-layer.8": 2352.1763, "encoder_q-layer.9": 2166.5454, "epoch": 0.72, "inbatch_neg_score": 0.2807, "inbatch_pos_score": 0.9248, "learning_rate": 1.4722222222222224e-05, "loss": 3.4115, "norm_diff": 0.126, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 14.533, "num_token_query": 37.3277, "num_token_union": 65.3448, "num_word_context": 202.3176, "num_word_doc": 49.7184, "num_word_query": 27.94, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3776.0949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2812, "query_norm": 1.3258, "queue_k_norm": 1.4517, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3277, "sent_len_1": 66.6695, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.5213, "stdk": 0.0487, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.4018, "doc_norm": 1.4534, "encoder_q-embeddings": 2838.0898, "encoder_q-layer.0": 1897.13, "encoder_q-layer.1": 2153.7048, "encoder_q-layer.10": 2245.7861, "encoder_q-layer.11": 5891.104, "encoder_q-layer.2": 2403.9768, "encoder_q-layer.3": 2530.8533, "encoder_q-layer.4": 2653.8264, "encoder_q-layer.5": 2707.1697, "encoder_q-layer.6": 2702.3401, "encoder_q-layer.7": 2621.7944, "encoder_q-layer.8": 2574.2271, "encoder_q-layer.9": 2231.3005, "epoch": 0.72, "inbatch_neg_score": 0.281, "inbatch_pos_score": 0.9341, "learning_rate": 1.4666666666666668e-05, "loss": 3.4018, "norm_diff": 0.1198, "norm_loss": 0.0, "num_token_doc": 66.9033, "num_token_overlap": 14.6027, "num_token_query": 37.2732, "num_token_union": 65.409, "num_word_context": 202.2941, "num_word_doc": 49.9403, "num_word_query": 27.8728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4431.7712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.281, "query_norm": 1.3337, "queue_k_norm": 1.4519, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2732, "sent_len_1": 66.9033, "sent_len_max_0": 128.0, "sent_len_max_1": 188.275, "stdk": 0.0488, "stdq": 0.0438, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3659, "doc_norm": 1.4523, "encoder_q-embeddings": 2508.4163, "encoder_q-layer.0": 1696.2013, "encoder_q-layer.1": 1875.1458, "encoder_q-layer.10": 2371.6475, "encoder_q-layer.11": 6140.4258, "encoder_q-layer.2": 2110.3679, "encoder_q-layer.3": 2185.1072, "encoder_q-layer.4": 2466.4822, "encoder_q-layer.5": 2660.512, "encoder_q-layer.6": 2450.7532, "encoder_q-layer.7": 2372.8247, "encoder_q-layer.8": 2603.6741, "encoder_q-layer.9": 2356.3516, "epoch": 0.72, "inbatch_neg_score": 0.2813, "inbatch_pos_score": 0.9292, "learning_rate": 1.4611111111111112e-05, "loss": 3.3659, "norm_diff": 0.0935, "norm_loss": 0.0, "num_token_doc": 66.7144, "num_token_overlap": 14.6014, "num_token_query": 37.3014, "num_token_union": 65.3853, "num_word_context": 202.3807, "num_word_doc": 49.7964, "num_word_query": 27.9351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4288.021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.3588, "queue_k_norm": 1.4525, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3014, "sent_len_1": 66.7144, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.6962, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3938, "doc_norm": 1.4555, "encoder_q-embeddings": 3395.698, "encoder_q-layer.0": 2472.3777, "encoder_q-layer.1": 2597.865, "encoder_q-layer.10": 2429.0132, "encoder_q-layer.11": 5976.7388, "encoder_q-layer.2": 3022.3633, "encoder_q-layer.3": 3282.4265, "encoder_q-layer.4": 3412.2803, "encoder_q-layer.5": 3794.2654, "encoder_q-layer.6": 3729.1897, "encoder_q-layer.7": 3023.1565, "encoder_q-layer.8": 2874.1143, "encoder_q-layer.9": 2361.406, "epoch": 0.72, "inbatch_neg_score": 0.2791, "inbatch_pos_score": 0.9448, "learning_rate": 1.4555555555555556e-05, "loss": 3.3938, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.3163, "num_token_overlap": 14.533, "num_token_query": 37.2836, "num_token_union": 65.1419, "num_word_context": 201.7166, "num_word_doc": 49.4936, "num_word_query": 27.9253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5111.3378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2793, "query_norm": 1.3458, "queue_k_norm": 1.4492, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2836, "sent_len_1": 66.3163, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2525, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3864, "doc_norm": 1.4485, "encoder_q-embeddings": 2246.3315, "encoder_q-layer.0": 1566.0284, "encoder_q-layer.1": 1639.7949, "encoder_q-layer.10": 2666.0833, "encoder_q-layer.11": 6569.7754, "encoder_q-layer.2": 1882.5013, "encoder_q-layer.3": 1896.6632, "encoder_q-layer.4": 1978.7456, "encoder_q-layer.5": 1992.8771, "encoder_q-layer.6": 2104.9656, "encoder_q-layer.7": 2331.2466, "encoder_q-layer.8": 2646.2214, "encoder_q-layer.9": 2344.8047, "epoch": 0.72, "inbatch_neg_score": 0.2773, "inbatch_pos_score": 0.9238, "learning_rate": 1.45e-05, "loss": 3.3864, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.5552, "num_token_overlap": 14.5857, "num_token_query": 37.3844, "num_token_union": 65.2626, "num_word_context": 202.2147, "num_word_doc": 49.6596, "num_word_query": 28.0011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4237.4599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2776, "query_norm": 1.3434, "queue_k_norm": 1.4512, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3844, "sent_len_1": 66.5552, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.57, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3886, "doc_norm": 1.4591, "encoder_q-embeddings": 2402.3657, "encoder_q-layer.0": 1617.6713, "encoder_q-layer.1": 1698.9624, "encoder_q-layer.10": 2335.6206, "encoder_q-layer.11": 6051.0396, "encoder_q-layer.2": 1943.7991, "encoder_q-layer.3": 1961.9958, "encoder_q-layer.4": 2030.3715, "encoder_q-layer.5": 2006.2125, "encoder_q-layer.6": 2257.3135, "encoder_q-layer.7": 2318.6804, "encoder_q-layer.8": 2532.5376, "encoder_q-layer.9": 2276.1172, "epoch": 0.72, "inbatch_neg_score": 0.2753, "inbatch_pos_score": 0.9121, "learning_rate": 1.4444444444444444e-05, "loss": 3.3886, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.6909, "num_token_overlap": 14.5861, "num_token_query": 37.2661, "num_token_union": 65.3354, "num_word_context": 201.8732, "num_word_doc": 49.7785, "num_word_query": 27.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4110.1455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2771, "query_norm": 1.3426, "queue_k_norm": 1.4507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2661, "sent_len_1": 66.6909, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0938, "stdk": 0.049, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3848, "doc_norm": 1.4546, "encoder_q-embeddings": 4217.7036, "encoder_q-layer.0": 2905.7292, "encoder_q-layer.1": 3183.4377, "encoder_q-layer.10": 2243.6433, "encoder_q-layer.11": 5870.9204, "encoder_q-layer.2": 4143.7476, "encoder_q-layer.3": 4094.4016, "encoder_q-layer.4": 4197.4087, "encoder_q-layer.5": 3466.7158, "encoder_q-layer.6": 3024.2332, "encoder_q-layer.7": 3463.5972, "encoder_q-layer.8": 2883.1335, "encoder_q-layer.9": 2191.8542, "epoch": 0.72, "inbatch_neg_score": 0.2765, "inbatch_pos_score": 0.9219, "learning_rate": 1.438888888888889e-05, "loss": 3.3848, "norm_diff": 0.1193, "norm_loss": 0.0, "num_token_doc": 66.6186, "num_token_overlap": 14.5263, "num_token_query": 37.3207, "num_token_union": 65.306, "num_word_context": 201.9443, "num_word_doc": 49.7153, "num_word_query": 27.9473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5574.7164, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2769, "query_norm": 1.3354, "queue_k_norm": 1.4522, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3207, "sent_len_1": 66.6186, "sent_len_max_0": 127.995, "sent_len_max_1": 190.8825, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3846, "doc_norm": 1.4521, "encoder_q-embeddings": 2147.1335, "encoder_q-layer.0": 1440.3976, "encoder_q-layer.1": 1516.9923, "encoder_q-layer.10": 2608.6462, "encoder_q-layer.11": 6556.624, "encoder_q-layer.2": 1778.5144, "encoder_q-layer.3": 1836.3665, "encoder_q-layer.4": 1871.6139, "encoder_q-layer.5": 1842.0979, "encoder_q-layer.6": 1939.7063, "encoder_q-layer.7": 2135.1936, "encoder_q-layer.8": 2548.1672, "encoder_q-layer.9": 2468.1475, "epoch": 0.72, "inbatch_neg_score": 0.2735, "inbatch_pos_score": 0.9087, "learning_rate": 1.4333333333333334e-05, "loss": 3.3846, "norm_diff": 0.1271, "norm_loss": 0.0, "num_token_doc": 66.9385, "num_token_overlap": 14.5674, "num_token_query": 37.4803, "num_token_union": 65.5789, "num_word_context": 202.5979, "num_word_doc": 49.9391, "num_word_query": 28.0667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4016.0931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2732, "query_norm": 1.325, "queue_k_norm": 1.4516, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4803, "sent_len_1": 66.9385, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7075, "stdk": 0.0487, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4112, "doc_norm": 1.4492, "encoder_q-embeddings": 4130.1567, "encoder_q-layer.0": 2829.8362, "encoder_q-layer.1": 2977.8892, "encoder_q-layer.10": 4629.0781, "encoder_q-layer.11": 12087.5205, "encoder_q-layer.2": 3312.0227, "encoder_q-layer.3": 3401.5032, "encoder_q-layer.4": 3586.0232, "encoder_q-layer.5": 3624.3232, "encoder_q-layer.6": 3908.7305, "encoder_q-layer.7": 4237.1885, "encoder_q-layer.8": 5168.522, "encoder_q-layer.9": 4657.2075, "epoch": 0.73, "inbatch_neg_score": 0.2733, "inbatch_pos_score": 0.8945, "learning_rate": 1.427777777777778e-05, "loss": 3.4112, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.9626, "num_token_overlap": 14.534, "num_token_query": 37.2065, "num_token_union": 65.4474, "num_word_context": 202.5149, "num_word_doc": 49.9788, "num_word_query": 27.8591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7907.4235, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2739, "query_norm": 1.3214, "queue_k_norm": 1.4485, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2065, "sent_len_1": 66.9626, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9363, "stdk": 0.0486, "stdq": 0.0433, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.4109, "doc_norm": 1.456, "encoder_q-embeddings": 6272.2754, "encoder_q-layer.0": 4173.5195, "encoder_q-layer.1": 4882.748, "encoder_q-layer.10": 4278.5088, "encoder_q-layer.11": 11465.9707, "encoder_q-layer.2": 5520.5908, "encoder_q-layer.3": 5643.6201, "encoder_q-layer.4": 6377.8701, "encoder_q-layer.5": 6438.144, "encoder_q-layer.6": 5875.7065, "encoder_q-layer.7": 5807.4868, "encoder_q-layer.8": 5588.0078, "encoder_q-layer.9": 4348.0371, "epoch": 0.73, "inbatch_neg_score": 0.2712, "inbatch_pos_score": 0.9336, "learning_rate": 1.4222222222222224e-05, "loss": 3.4109, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.6651, "num_token_overlap": 14.5741, "num_token_query": 37.1971, "num_token_union": 65.249, "num_word_context": 202.282, "num_word_doc": 49.719, "num_word_query": 27.858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9388.885, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2715, "query_norm": 1.3481, "queue_k_norm": 1.4506, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1971, "sent_len_1": 66.6651, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5075, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.3889, "doc_norm": 1.452, "encoder_q-embeddings": 46148.0586, "encoder_q-layer.0": 31982.375, "encoder_q-layer.1": 34477.5898, "encoder_q-layer.10": 4576.7549, "encoder_q-layer.11": 12977.2998, "encoder_q-layer.2": 41513.0156, "encoder_q-layer.3": 46344.5898, "encoder_q-layer.4": 43447.9375, "encoder_q-layer.5": 42979.125, "encoder_q-layer.6": 26784.2988, "encoder_q-layer.7": 25136.2891, "encoder_q-layer.8": 15498.6211, "encoder_q-layer.9": 5378.2397, "epoch": 0.73, "inbatch_neg_score": 0.2737, "inbatch_pos_score": 0.9014, "learning_rate": 1.4166666666666668e-05, "loss": 3.3889, "norm_diff": 0.1241, "norm_loss": 0.0, "num_token_doc": 66.6742, "num_token_overlap": 14.5777, "num_token_query": 37.2937, "num_token_union": 65.261, "num_word_context": 201.9881, "num_word_doc": 49.738, "num_word_query": 27.9033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 49933.5199, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.2749, "query_norm": 1.3279, "queue_k_norm": 1.4492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2937, "sent_len_1": 66.6742, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2887, "stdk": 0.0488, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3857, "doc_norm": 1.4538, "encoder_q-embeddings": 11132.8711, "encoder_q-layer.0": 8227.082, "encoder_q-layer.1": 9428.1738, "encoder_q-layer.10": 4650.7129, "encoder_q-layer.11": 11685.2275, "encoder_q-layer.2": 11403.4609, "encoder_q-layer.3": 11745.418, "encoder_q-layer.4": 12132.9775, "encoder_q-layer.5": 14117.9697, "encoder_q-layer.6": 15647.957, "encoder_q-layer.7": 14891.7549, "encoder_q-layer.8": 10693.0869, "encoder_q-layer.9": 5036.5596, "epoch": 0.73, "inbatch_neg_score": 0.2767, "inbatch_pos_score": 0.9346, "learning_rate": 1.4111111111111112e-05, "loss": 3.3857, "norm_diff": 0.1014, "norm_loss": 0.0, "num_token_doc": 66.7987, "num_token_overlap": 14.5283, "num_token_query": 37.3563, "num_token_union": 65.4588, "num_word_context": 202.5511, "num_word_doc": 49.83, "num_word_query": 27.9656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17112.804, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2756, "query_norm": 1.3525, "queue_k_norm": 1.4501, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3563, "sent_len_1": 66.7987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9925, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3733, "doc_norm": 1.4452, "encoder_q-embeddings": 5950.5962, "encoder_q-layer.0": 4265.1069, "encoder_q-layer.1": 4678.1362, "encoder_q-layer.10": 4524.3804, "encoder_q-layer.11": 11798.8906, "encoder_q-layer.2": 5303.7017, "encoder_q-layer.3": 5880.0991, "encoder_q-layer.4": 6137.52, "encoder_q-layer.5": 6518.3179, "encoder_q-layer.6": 6911.2437, "encoder_q-layer.7": 6758.9512, "encoder_q-layer.8": 6150.3516, "encoder_q-layer.9": 4690.1455, "epoch": 0.73, "inbatch_neg_score": 0.2757, "inbatch_pos_score": 0.9414, "learning_rate": 1.4055555555555556e-05, "loss": 3.3733, "norm_diff": 0.074, "norm_loss": 0.0, "num_token_doc": 66.6873, "num_token_overlap": 14.5828, "num_token_query": 37.4626, "num_token_union": 65.3775, "num_word_context": 202.0195, "num_word_doc": 49.7473, "num_word_query": 28.0303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9651.185, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2754, "query_norm": 1.3711, "queue_k_norm": 1.4481, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4626, "sent_len_1": 66.6873, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9837, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3961, "doc_norm": 1.4532, "encoder_q-embeddings": 19229.4668, "encoder_q-layer.0": 13025.6533, "encoder_q-layer.1": 14784.3105, "encoder_q-layer.10": 4747.7646, "encoder_q-layer.11": 12331.8418, "encoder_q-layer.2": 16782.0098, "encoder_q-layer.3": 18248.6543, "encoder_q-layer.4": 21334.082, "encoder_q-layer.5": 22221.4336, "encoder_q-layer.6": 20037.0625, "encoder_q-layer.7": 15066.998, "encoder_q-layer.8": 7412.6768, "encoder_q-layer.9": 4735.8081, "epoch": 0.73, "inbatch_neg_score": 0.2771, "inbatch_pos_score": 0.9365, "learning_rate": 1.4000000000000001e-05, "loss": 3.3961, "norm_diff": 0.0994, "norm_loss": 0.0, "num_token_doc": 66.6446, "num_token_overlap": 14.5226, "num_token_query": 37.2574, "num_token_union": 65.3914, "num_word_context": 202.1136, "num_word_doc": 49.7237, "num_word_query": 27.9029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 23630.0329, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2773, "query_norm": 1.3538, "queue_k_norm": 1.4514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2574, "sent_len_1": 66.6446, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.5075, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.3819, "doc_norm": 1.4557, "encoder_q-embeddings": 2823.3066, "encoder_q-layer.0": 1933.1664, "encoder_q-layer.1": 2218.8967, "encoder_q-layer.10": 2674.9976, "encoder_q-layer.11": 6659.7061, "encoder_q-layer.2": 2394.9587, "encoder_q-layer.3": 2424.334, "encoder_q-layer.4": 2721.8171, "encoder_q-layer.5": 2553.0862, "encoder_q-layer.6": 2606.2974, "encoder_q-layer.7": 2540.4495, "encoder_q-layer.8": 2884.2686, "encoder_q-layer.9": 2520.0029, "epoch": 0.73, "inbatch_neg_score": 0.2786, "inbatch_pos_score": 0.9121, "learning_rate": 1.3944444444444446e-05, "loss": 3.3819, "norm_diff": 0.0921, "norm_loss": 0.0, "num_token_doc": 66.6904, "num_token_overlap": 14.6245, "num_token_query": 37.3754, "num_token_union": 65.3645, "num_word_context": 202.1658, "num_word_doc": 49.7878, "num_word_query": 27.974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4626.3286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.3636, "queue_k_norm": 1.4504, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3754, "sent_len_1": 66.6904, "sent_len_max_0": 127.9912, "sent_len_max_1": 186.4062, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3816, "doc_norm": 1.4531, "encoder_q-embeddings": 3125.4771, "encoder_q-layer.0": 2159.5132, "encoder_q-layer.1": 2264.2246, "encoder_q-layer.10": 2642.7273, "encoder_q-layer.11": 5973.73, "encoder_q-layer.2": 2577.9409, "encoder_q-layer.3": 2451.7214, "encoder_q-layer.4": 2378.5762, "encoder_q-layer.5": 2263.1804, "encoder_q-layer.6": 2201.6321, "encoder_q-layer.7": 2271.8574, "encoder_q-layer.8": 2557.8242, "encoder_q-layer.9": 2247.6821, "epoch": 0.73, "inbatch_neg_score": 0.2799, "inbatch_pos_score": 0.9492, "learning_rate": 1.388888888888889e-05, "loss": 3.3816, "norm_diff": 0.0886, "norm_loss": 0.0, "num_token_doc": 66.8261, "num_token_overlap": 14.6073, "num_token_query": 37.3193, "num_token_union": 65.424, "num_word_context": 202.3463, "num_word_doc": 49.9036, "num_word_query": 27.9529, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4465.0509, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2812, "query_norm": 1.3645, "queue_k_norm": 1.4512, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3193, "sent_len_1": 66.8261, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.9062, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.3946, "doc_norm": 1.452, "encoder_q-embeddings": 4097.7002, "encoder_q-layer.0": 2852.5093, "encoder_q-layer.1": 3124.3518, "encoder_q-layer.10": 2502.3813, "encoder_q-layer.11": 6300.1025, "encoder_q-layer.2": 3448.9749, "encoder_q-layer.3": 3444.7588, "encoder_q-layer.4": 3401.365, "encoder_q-layer.5": 3697.8398, "encoder_q-layer.6": 3837.9956, "encoder_q-layer.7": 3702.2966, "encoder_q-layer.8": 3283.3779, "encoder_q-layer.9": 2413.4004, "epoch": 0.73, "inbatch_neg_score": 0.2859, "inbatch_pos_score": 0.9272, "learning_rate": 1.3833333333333334e-05, "loss": 3.3946, "norm_diff": 0.1019, "norm_loss": 0.0, "num_token_doc": 66.66, "num_token_overlap": 14.6827, "num_token_query": 37.6015, "num_token_union": 65.3702, "num_word_context": 202.0303, "num_word_doc": 49.7516, "num_word_query": 28.1782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5573.5849, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2856, "query_norm": 1.3501, "queue_k_norm": 1.4508, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6015, "sent_len_1": 66.66, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5962, "stdk": 0.0487, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3871, "doc_norm": 1.4479, "encoder_q-embeddings": 2866.7349, "encoder_q-layer.0": 1947.9824, "encoder_q-layer.1": 2210.0422, "encoder_q-layer.10": 2265.7292, "encoder_q-layer.11": 5839.0396, "encoder_q-layer.2": 2530.0906, "encoder_q-layer.3": 2604.4253, "encoder_q-layer.4": 2539.9673, "encoder_q-layer.5": 2600.9104, "encoder_q-layer.6": 2490.0547, "encoder_q-layer.7": 2460.7805, "encoder_q-layer.8": 2523.2656, "encoder_q-layer.9": 2201.1562, "epoch": 0.73, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 0.9443, "learning_rate": 1.3777777777777778e-05, "loss": 3.3871, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.8213, "num_token_overlap": 14.5902, "num_token_query": 37.4189, "num_token_union": 65.4366, "num_word_context": 202.5882, "num_word_doc": 49.8446, "num_word_query": 28.0153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4315.0, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.3583, "queue_k_norm": 1.4495, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4189, "sent_len_1": 66.8213, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6362, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.376, "doc_norm": 1.4462, "encoder_q-embeddings": 2244.9746, "encoder_q-layer.0": 1474.4572, "encoder_q-layer.1": 1487.4384, "encoder_q-layer.10": 2473.9092, "encoder_q-layer.11": 6163.5557, "encoder_q-layer.2": 1659.6476, "encoder_q-layer.3": 1779.3876, "encoder_q-layer.4": 1825.2678, "encoder_q-layer.5": 1805.6088, "encoder_q-layer.6": 1912.5798, "encoder_q-layer.7": 2045.7761, "encoder_q-layer.8": 2481.7896, "encoder_q-layer.9": 2311.3469, "epoch": 0.74, "inbatch_neg_score": 0.2848, "inbatch_pos_score": 0.9292, "learning_rate": 1.3722222222222222e-05, "loss": 3.376, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 14.5964, "num_token_query": 37.2815, "num_token_union": 65.308, "num_word_context": 202.098, "num_word_doc": 49.8229, "num_word_query": 27.9034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3995.5704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3601, "queue_k_norm": 1.4495, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2815, "sent_len_1": 66.8467, "sent_len_max_0": 128.0, "sent_len_max_1": 193.31, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3912, "doc_norm": 1.4524, "encoder_q-embeddings": 2241.6875, "encoder_q-layer.0": 1556.7938, "encoder_q-layer.1": 1692.9651, "encoder_q-layer.10": 2200.6038, "encoder_q-layer.11": 6034.0308, "encoder_q-layer.2": 1934.4678, "encoder_q-layer.3": 2014.8459, "encoder_q-layer.4": 2148.1399, "encoder_q-layer.5": 2125.3518, "encoder_q-layer.6": 2119.9487, "encoder_q-layer.7": 2332.5547, "encoder_q-layer.8": 2548.126, "encoder_q-layer.9": 2231.72, "epoch": 0.74, "inbatch_neg_score": 0.285, "inbatch_pos_score": 0.9365, "learning_rate": 1.3666666666666666e-05, "loss": 3.3912, "norm_diff": 0.0913, "norm_loss": 0.0, "num_token_doc": 66.65, "num_token_overlap": 14.5838, "num_token_query": 37.3951, "num_token_union": 65.3385, "num_word_context": 202.0852, "num_word_doc": 49.7288, "num_word_query": 28.0246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4034.2945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3611, "queue_k_norm": 1.4504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3951, "sent_len_1": 66.65, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2625, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3886, "doc_norm": 1.4555, "encoder_q-embeddings": 2111.6777, "encoder_q-layer.0": 1444.6194, "encoder_q-layer.1": 1467.2803, "encoder_q-layer.10": 2238.1465, "encoder_q-layer.11": 5897.8271, "encoder_q-layer.2": 1647.8124, "encoder_q-layer.3": 1691.4189, "encoder_q-layer.4": 1797.5696, "encoder_q-layer.5": 1826.4249, "encoder_q-layer.6": 1965.1809, "encoder_q-layer.7": 2191.9307, "encoder_q-layer.8": 2486.3176, "encoder_q-layer.9": 2161.5923, "epoch": 0.74, "inbatch_neg_score": 0.2867, "inbatch_pos_score": 0.9277, "learning_rate": 1.3611111111111111e-05, "loss": 3.3886, "norm_diff": 0.1003, "norm_loss": 0.0, "num_token_doc": 66.7348, "num_token_overlap": 14.5704, "num_token_query": 37.3938, "num_token_union": 65.4152, "num_word_context": 202.1898, "num_word_doc": 49.8205, "num_word_query": 28.015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3810.4543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2874, "query_norm": 1.3553, "queue_k_norm": 1.4513, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3938, "sent_len_1": 66.7348, "sent_len_max_0": 128.0, "sent_len_max_1": 189.77, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3866, "doc_norm": 1.4474, "encoder_q-embeddings": 3070.4526, "encoder_q-layer.0": 2041.434, "encoder_q-layer.1": 2249.5984, "encoder_q-layer.10": 2357.2314, "encoder_q-layer.11": 6371.8716, "encoder_q-layer.2": 2628.6421, "encoder_q-layer.3": 2692.6213, "encoder_q-layer.4": 3015.6226, "encoder_q-layer.5": 2879.2861, "encoder_q-layer.6": 2881.5391, "encoder_q-layer.7": 2930.1611, "encoder_q-layer.8": 2906.5991, "encoder_q-layer.9": 2386.4016, "epoch": 0.74, "inbatch_neg_score": 0.2928, "inbatch_pos_score": 0.9321, "learning_rate": 1.3555555555555557e-05, "loss": 3.3866, "norm_diff": 0.0911, "norm_loss": 0.0, "num_token_doc": 66.8106, "num_token_overlap": 14.5355, "num_token_query": 37.1825, "num_token_union": 65.2917, "num_word_context": 202.196, "num_word_doc": 49.7996, "num_word_query": 27.8173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4774.6993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2922, "query_norm": 1.3563, "queue_k_norm": 1.4506, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1825, "sent_len_1": 66.8106, "sent_len_max_0": 128.0, "sent_len_max_1": 192.505, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3634, "doc_norm": 1.4516, "encoder_q-embeddings": 3311.0955, "encoder_q-layer.0": 2187.2551, "encoder_q-layer.1": 2509.0652, "encoder_q-layer.10": 2459.8398, "encoder_q-layer.11": 6071.7446, "encoder_q-layer.2": 2927.4854, "encoder_q-layer.3": 3186.3718, "encoder_q-layer.4": 3181.8079, "encoder_q-layer.5": 2668.6155, "encoder_q-layer.6": 2408.8484, "encoder_q-layer.7": 2247.6697, "encoder_q-layer.8": 2539.6025, "encoder_q-layer.9": 2277.9583, "epoch": 0.74, "inbatch_neg_score": 0.2938, "inbatch_pos_score": 0.9536, "learning_rate": 1.3500000000000001e-05, "loss": 3.3634, "norm_diff": 0.0813, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 14.5668, "num_token_query": 37.3189, "num_token_union": 65.3097, "num_word_context": 202.4112, "num_word_doc": 49.7491, "num_word_query": 27.9379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4751.1809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.3703, "queue_k_norm": 1.4515, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3189, "sent_len_1": 66.6695, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1863, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3618, "doc_norm": 1.4481, "encoder_q-embeddings": 2613.0308, "encoder_q-layer.0": 1808.1646, "encoder_q-layer.1": 1968.8058, "encoder_q-layer.10": 2259.8274, "encoder_q-layer.11": 5800.1665, "encoder_q-layer.2": 2189.8691, "encoder_q-layer.3": 2358.8291, "encoder_q-layer.4": 2377.531, "encoder_q-layer.5": 2373.7437, "encoder_q-layer.6": 2582.562, "encoder_q-layer.7": 2708.1567, "encoder_q-layer.8": 2745.0522, "encoder_q-layer.9": 2192.8428, "epoch": 0.74, "inbatch_neg_score": 0.2943, "inbatch_pos_score": 0.9565, "learning_rate": 1.3444444444444445e-05, "loss": 3.3618, "norm_diff": 0.0858, "norm_loss": 0.0, "num_token_doc": 66.7879, "num_token_overlap": 14.6176, "num_token_query": 37.3427, "num_token_union": 65.414, "num_word_context": 202.5708, "num_word_doc": 49.8393, "num_word_query": 27.9807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4278.4215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3623, "queue_k_norm": 1.454, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3427, "sent_len_1": 66.7879, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2275, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3793, "doc_norm": 1.4562, "encoder_q-embeddings": 2476.5037, "encoder_q-layer.0": 1753.7063, "encoder_q-layer.1": 1909.7927, "encoder_q-layer.10": 2360.6724, "encoder_q-layer.11": 5950.0044, "encoder_q-layer.2": 2273.1768, "encoder_q-layer.3": 2351.4978, "encoder_q-layer.4": 2547.5063, "encoder_q-layer.5": 2455.9001, "encoder_q-layer.6": 2484.7969, "encoder_q-layer.7": 2495.075, "encoder_q-layer.8": 2678.3757, "encoder_q-layer.9": 2348.6562, "epoch": 0.74, "inbatch_neg_score": 0.2939, "inbatch_pos_score": 0.9326, "learning_rate": 1.338888888888889e-05, "loss": 3.3793, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 14.5639, "num_token_query": 37.3419, "num_token_union": 65.3609, "num_word_context": 202.0024, "num_word_doc": 49.7843, "num_word_query": 27.9587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4256.8071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3486, "queue_k_norm": 1.4529, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3419, "sent_len_1": 66.7047, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.8487, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3935, "doc_norm": 1.4575, "encoder_q-embeddings": 1979.2424, "encoder_q-layer.0": 1335.5297, "encoder_q-layer.1": 1362.4722, "encoder_q-layer.10": 2440.8503, "encoder_q-layer.11": 6413.229, "encoder_q-layer.2": 1566.229, "encoder_q-layer.3": 1651.5734, "encoder_q-layer.4": 1691.4933, "encoder_q-layer.5": 1688.1874, "encoder_q-layer.6": 1923.045, "encoder_q-layer.7": 2265.6182, "encoder_q-layer.8": 2527.5757, "encoder_q-layer.9": 2263.2378, "epoch": 0.74, "inbatch_neg_score": 0.2936, "inbatch_pos_score": 0.9512, "learning_rate": 1.3333333333333333e-05, "loss": 3.3935, "norm_diff": 0.0967, "norm_loss": 0.0, "num_token_doc": 66.7732, "num_token_overlap": 14.5382, "num_token_query": 37.1999, "num_token_union": 65.2844, "num_word_context": 202.3009, "num_word_doc": 49.8042, "num_word_query": 27.8332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3946.3955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.3609, "queue_k_norm": 1.4535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1999, "sent_len_1": 66.7732, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9375, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.3757, "doc_norm": 1.4535, "encoder_q-embeddings": 1173.6638, "encoder_q-layer.0": 842.5323, "encoder_q-layer.1": 927.5856, "encoder_q-layer.10": 1160.0702, "encoder_q-layer.11": 2982.6694, "encoder_q-layer.2": 1132.9376, "encoder_q-layer.3": 1191.2657, "encoder_q-layer.4": 1382.1146, "encoder_q-layer.5": 1373.6594, "encoder_q-layer.6": 1315.1624, "encoder_q-layer.7": 1273.4446, "encoder_q-layer.8": 1276.757, "encoder_q-layer.9": 1084.6758, "epoch": 0.74, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.9585, "learning_rate": 1.3277777777777777e-05, "loss": 3.3757, "norm_diff": 0.0979, "norm_loss": 0.0, "num_token_doc": 66.6886, "num_token_overlap": 14.6048, "num_token_query": 37.2533, "num_token_union": 65.2705, "num_word_context": 202.068, "num_word_doc": 49.7643, "num_word_query": 27.8961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2159.1453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.3556, "queue_k_norm": 1.4538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2533, "sent_len_1": 66.6886, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9525, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3817, "doc_norm": 1.4582, "encoder_q-embeddings": 1030.6969, "encoder_q-layer.0": 697.241, "encoder_q-layer.1": 748.285, "encoder_q-layer.10": 1219.715, "encoder_q-layer.11": 3131.7581, "encoder_q-layer.2": 866.2554, "encoder_q-layer.3": 881.2358, "encoder_q-layer.4": 924.697, "encoder_q-layer.5": 915.7489, "encoder_q-layer.6": 976.1497, "encoder_q-layer.7": 1019.2354, "encoder_q-layer.8": 1228.2686, "encoder_q-layer.9": 1121.301, "epoch": 0.74, "inbatch_neg_score": 0.2921, "inbatch_pos_score": 0.9575, "learning_rate": 1.3222222222222221e-05, "loss": 3.3817, "norm_diff": 0.1087, "norm_loss": 0.0, "num_token_doc": 66.5033, "num_token_overlap": 14.5851, "num_token_query": 37.2386, "num_token_union": 65.1812, "num_word_context": 201.946, "num_word_doc": 49.6482, "num_word_query": 27.8736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.6789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3495, "queue_k_norm": 1.4535, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2386, "sent_len_1": 66.5033, "sent_len_max_0": 128.0, "sent_len_max_1": 189.125, "stdk": 0.049, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.3717, "doc_norm": 1.4535, "encoder_q-embeddings": 1415.8921, "encoder_q-layer.0": 977.7235, "encoder_q-layer.1": 1029.0061, "encoder_q-layer.10": 1233.2163, "encoder_q-layer.11": 3208.0164, "encoder_q-layer.2": 1256.7684, "encoder_q-layer.3": 1207.2969, "encoder_q-layer.4": 1219.2673, "encoder_q-layer.5": 1192.6893, "encoder_q-layer.6": 1180.8328, "encoder_q-layer.7": 1170.7557, "encoder_q-layer.8": 1330.7573, "encoder_q-layer.9": 1194.4015, "epoch": 0.74, "inbatch_neg_score": 0.2886, "inbatch_pos_score": 0.9214, "learning_rate": 1.3166666666666665e-05, "loss": 3.3717, "norm_diff": 0.107, "norm_loss": 0.0, "num_token_doc": 66.7164, "num_token_overlap": 14.6729, "num_token_query": 37.4991, "num_token_union": 65.371, "num_word_context": 202.187, "num_word_doc": 49.8151, "num_word_query": 28.0963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2214.946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3466, "queue_k_norm": 1.4537, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4991, "sent_len_1": 66.7164, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8988, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3883, "doc_norm": 1.4551, "encoder_q-embeddings": 533.7414, "encoder_q-layer.0": 361.7466, "encoder_q-layer.1": 379.1259, "encoder_q-layer.10": 663.6617, "encoder_q-layer.11": 1592.1387, "encoder_q-layer.2": 444.162, "encoder_q-layer.3": 446.276, "encoder_q-layer.4": 461.5203, "encoder_q-layer.5": 454.8224, "encoder_q-layer.6": 508.7072, "encoder_q-layer.7": 534.9987, "encoder_q-layer.8": 614.5978, "encoder_q-layer.9": 564.8827, "epoch": 0.75, "inbatch_neg_score": 0.2875, "inbatch_pos_score": 0.9258, "learning_rate": 1.3111111111111113e-05, "loss": 3.3883, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.6539, "num_token_overlap": 14.6149, "num_token_query": 37.3876, "num_token_union": 65.285, "num_word_context": 202.3923, "num_word_doc": 49.7201, "num_word_query": 28.0181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 994.5866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3421, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3876, "sent_len_1": 66.6539, "sent_len_max_0": 128.0, "sent_len_max_1": 189.44, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.405, "doc_norm": 1.4541, "encoder_q-embeddings": 254.9066, "encoder_q-layer.0": 175.0207, "encoder_q-layer.1": 185.791, "encoder_q-layer.10": 309.2256, "encoder_q-layer.11": 723.0886, "encoder_q-layer.2": 215.7054, "encoder_q-layer.3": 216.3501, "encoder_q-layer.4": 233.1933, "encoder_q-layer.5": 223.2255, "encoder_q-layer.6": 247.9092, "encoder_q-layer.7": 268.3978, "encoder_q-layer.8": 347.2304, "encoder_q-layer.9": 301.2248, "epoch": 0.75, "inbatch_neg_score": 0.2838, "inbatch_pos_score": 0.9326, "learning_rate": 1.3055555555555557e-05, "loss": 3.405, "norm_diff": 0.1145, "norm_loss": 0.0, "num_token_doc": 66.7052, "num_token_overlap": 14.5367, "num_token_query": 37.3196, "num_token_union": 65.3972, "num_word_context": 202.4363, "num_word_doc": 49.7952, "num_word_query": 27.943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 471.9649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2837, "query_norm": 1.3397, "queue_k_norm": 1.455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3196, "sent_len_1": 66.7052, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.1037, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3893, "doc_norm": 1.4515, "encoder_q-embeddings": 246.9185, "encoder_q-layer.0": 166.9556, "encoder_q-layer.1": 171.5645, "encoder_q-layer.10": 294.9109, "encoder_q-layer.11": 759.4515, "encoder_q-layer.2": 200.2204, "encoder_q-layer.3": 202.9402, "encoder_q-layer.4": 211.8742, "encoder_q-layer.5": 213.7752, "encoder_q-layer.6": 238.7122, "encoder_q-layer.7": 268.248, "encoder_q-layer.8": 308.7376, "encoder_q-layer.9": 282.4064, "epoch": 0.75, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.939, "learning_rate": 1.3000000000000001e-05, "loss": 3.3893, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.684, "num_token_overlap": 14.5714, "num_token_query": 37.318, "num_token_union": 65.3492, "num_word_context": 202.1869, "num_word_doc": 49.7082, "num_word_query": 27.9415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 465.7392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3419, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.318, "sent_len_1": 66.684, "sent_len_max_0": 127.99, "sent_len_max_1": 192.43, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3825, "doc_norm": 1.4577, "encoder_q-embeddings": 247.7644, "encoder_q-layer.0": 168.2675, "encoder_q-layer.1": 175.8824, "encoder_q-layer.10": 354.3199, "encoder_q-layer.11": 780.3708, "encoder_q-layer.2": 198.0635, "encoder_q-layer.3": 207.4993, "encoder_q-layer.4": 217.3711, "encoder_q-layer.5": 214.5654, "encoder_q-layer.6": 243.7521, "encoder_q-layer.7": 272.9814, "encoder_q-layer.8": 311.5071, "encoder_q-layer.9": 306.2834, "epoch": 0.75, "inbatch_neg_score": 0.286, "inbatch_pos_score": 0.9482, "learning_rate": 1.2944444444444445e-05, "loss": 3.3825, "norm_diff": 0.1117, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 14.5973, "num_token_query": 37.2854, "num_token_union": 65.3871, "num_word_context": 202.178, "num_word_doc": 49.887, "num_word_query": 27.9302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 486.4529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.346, "queue_k_norm": 1.4539, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2854, "sent_len_1": 66.8275, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3711, "doc_norm": 1.4538, "encoder_q-embeddings": 267.3356, "encoder_q-layer.0": 176.2382, "encoder_q-layer.1": 181.3466, "encoder_q-layer.10": 308.3794, "encoder_q-layer.11": 756.3852, "encoder_q-layer.2": 204.962, "encoder_q-layer.3": 210.2007, "encoder_q-layer.4": 212.3474, "encoder_q-layer.5": 212.9829, "encoder_q-layer.6": 238.2763, "encoder_q-layer.7": 277.2813, "encoder_q-layer.8": 311.4043, "encoder_q-layer.9": 280.2858, "epoch": 0.75, "inbatch_neg_score": 0.2849, "inbatch_pos_score": 0.9248, "learning_rate": 1.2888888888888889e-05, "loss": 3.3711, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.6708, "num_token_overlap": 14.5896, "num_token_query": 37.3749, "num_token_union": 65.3161, "num_word_context": 202.12, "num_word_doc": 49.7147, "num_word_query": 27.986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 483.9832, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2849, "query_norm": 1.3356, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3749, "sent_len_1": 66.6708, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8738, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3679, "doc_norm": 1.4568, "encoder_q-embeddings": 2056.8855, "encoder_q-layer.0": 1297.6217, "encoder_q-layer.1": 1408.6261, "encoder_q-layer.10": 318.5725, "encoder_q-layer.11": 766.2219, "encoder_q-layer.2": 1502.2499, "encoder_q-layer.3": 1498.2646, "encoder_q-layer.4": 1368.1067, "encoder_q-layer.5": 1102.446, "encoder_q-layer.6": 989.162, "encoder_q-layer.7": 973.7441, "encoder_q-layer.8": 736.4603, "encoder_q-layer.9": 327.4469, "epoch": 0.75, "inbatch_neg_score": 0.2863, "inbatch_pos_score": 0.9189, "learning_rate": 1.2833333333333333e-05, "loss": 3.3679, "norm_diff": 0.1096, "norm_loss": 0.0, "num_token_doc": 66.7903, "num_token_overlap": 14.6191, "num_token_query": 37.478, "num_token_union": 65.4525, "num_word_context": 202.5687, "num_word_doc": 49.8431, "num_word_query": 28.0381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1966.8568, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2864, "query_norm": 1.3472, "queue_k_norm": 1.4532, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.478, "sent_len_1": 66.7903, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0938, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.3677, "doc_norm": 1.4507, "encoder_q-embeddings": 280.3224, "encoder_q-layer.0": 190.8285, "encoder_q-layer.1": 216.5852, "encoder_q-layer.10": 351.6681, "encoder_q-layer.11": 820.0727, "encoder_q-layer.2": 252.8143, "encoder_q-layer.3": 260.5827, "encoder_q-layer.4": 276.3606, "encoder_q-layer.5": 283.2617, "encoder_q-layer.6": 299.548, "encoder_q-layer.7": 319.6337, "encoder_q-layer.8": 390.2422, "encoder_q-layer.9": 346.5103, "epoch": 0.75, "inbatch_neg_score": 0.2895, "inbatch_pos_score": 0.9224, "learning_rate": 1.2777777777777777e-05, "loss": 3.3677, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 66.7455, "num_token_overlap": 14.5725, "num_token_query": 37.3045, "num_token_union": 65.3758, "num_word_context": 202.3282, "num_word_doc": 49.8453, "num_word_query": 27.9406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 541.852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3463, "queue_k_norm": 1.4554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3045, "sent_len_1": 66.7455, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4588, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3768, "doc_norm": 1.456, "encoder_q-embeddings": 324.4661, "encoder_q-layer.0": 230.8992, "encoder_q-layer.1": 253.3969, "encoder_q-layer.10": 340.7699, "encoder_q-layer.11": 806.0444, "encoder_q-layer.2": 285.2981, "encoder_q-layer.3": 300.5117, "encoder_q-layer.4": 311.8146, "encoder_q-layer.5": 270.0613, "encoder_q-layer.6": 291.8254, "encoder_q-layer.7": 295.5273, "encoder_q-layer.8": 351.7315, "encoder_q-layer.9": 299.3056, "epoch": 0.75, "inbatch_neg_score": 0.288, "inbatch_pos_score": 0.9326, "learning_rate": 1.2722222222222221e-05, "loss": 3.3768, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.7191, "num_token_overlap": 14.5288, "num_token_query": 37.1877, "num_token_union": 65.3263, "num_word_context": 202.0523, "num_word_doc": 49.7476, "num_word_query": 27.827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.7729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.3425, "queue_k_norm": 1.4561, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1877, "sent_len_1": 66.7191, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.29, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3762, "doc_norm": 1.4529, "encoder_q-embeddings": 270.3123, "encoder_q-layer.0": 188.6625, "encoder_q-layer.1": 198.6891, "encoder_q-layer.10": 313.725, "encoder_q-layer.11": 782.3157, "encoder_q-layer.2": 226.7601, "encoder_q-layer.3": 228.3754, "encoder_q-layer.4": 240.0094, "encoder_q-layer.5": 235.2678, "encoder_q-layer.6": 256.6654, "encoder_q-layer.7": 271.3053, "encoder_q-layer.8": 304.8197, "encoder_q-layer.9": 294.7061, "epoch": 0.75, "inbatch_neg_score": 0.2873, "inbatch_pos_score": 0.9497, "learning_rate": 1.2666666666666668e-05, "loss": 3.3762, "norm_diff": 0.0966, "norm_loss": 0.0, "num_token_doc": 66.6741, "num_token_overlap": 14.6009, "num_token_query": 37.4275, "num_token_union": 65.3602, "num_word_context": 202.6308, "num_word_doc": 49.7482, "num_word_query": 28.0411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 497.9462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2871, "query_norm": 1.3563, "queue_k_norm": 1.4539, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4275, "sent_len_1": 66.6741, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7688, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3751, "doc_norm": 1.4594, "encoder_q-embeddings": 373.1894, "encoder_q-layer.0": 281.7714, "encoder_q-layer.1": 289.1725, "encoder_q-layer.10": 277.3863, "encoder_q-layer.11": 781.5024, "encoder_q-layer.2": 354.6373, "encoder_q-layer.3": 307.1597, "encoder_q-layer.4": 298.2076, "encoder_q-layer.5": 271.7614, "encoder_q-layer.6": 279.0063, "encoder_q-layer.7": 284.2588, "encoder_q-layer.8": 322.8256, "encoder_q-layer.9": 288.4527, "epoch": 0.75, "inbatch_neg_score": 0.2902, "inbatch_pos_score": 0.9287, "learning_rate": 1.2611111111111113e-05, "loss": 3.3751, "norm_diff": 0.1162, "norm_loss": 0.0, "num_token_doc": 66.8322, "num_token_overlap": 14.6222, "num_token_query": 37.4561, "num_token_union": 65.4306, "num_word_context": 202.3685, "num_word_doc": 49.8148, "num_word_query": 28.0262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 563.4564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.3432, "queue_k_norm": 1.4533, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4561, "sent_len_1": 66.8322, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4875, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3794, "doc_norm": 1.4505, "encoder_q-embeddings": 416.1136, "encoder_q-layer.0": 299.7115, "encoder_q-layer.1": 305.1259, "encoder_q-layer.10": 296.5473, "encoder_q-layer.11": 754.4372, "encoder_q-layer.2": 345.7281, "encoder_q-layer.3": 355.5441, "encoder_q-layer.4": 364.5992, "encoder_q-layer.5": 388.7315, "encoder_q-layer.6": 414.4381, "encoder_q-layer.7": 398.8978, "encoder_q-layer.8": 425.6077, "encoder_q-layer.9": 298.565, "epoch": 0.76, "inbatch_neg_score": 0.289, "inbatch_pos_score": 0.9331, "learning_rate": 1.2555555555555557e-05, "loss": 3.3794, "norm_diff": 0.0986, "norm_loss": 0.0, "num_token_doc": 66.928, "num_token_overlap": 14.6027, "num_token_query": 37.2494, "num_token_union": 65.4171, "num_word_context": 202.4318, "num_word_doc": 49.9481, "num_word_query": 27.8747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 611.3405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2888, "query_norm": 1.3519, "queue_k_norm": 1.4536, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2494, "sent_len_1": 66.928, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2775, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3746, "doc_norm": 1.4537, "encoder_q-embeddings": 369.8807, "encoder_q-layer.0": 263.6111, "encoder_q-layer.1": 269.5165, "encoder_q-layer.10": 294.6661, "encoder_q-layer.11": 789.0578, "encoder_q-layer.2": 309.3291, "encoder_q-layer.3": 310.9657, "encoder_q-layer.4": 311.5734, "encoder_q-layer.5": 283.0509, "encoder_q-layer.6": 280.7882, "encoder_q-layer.7": 289.9488, "encoder_q-layer.8": 327.3867, "encoder_q-layer.9": 280.8882, "epoch": 0.76, "inbatch_neg_score": 0.2908, "inbatch_pos_score": 0.9448, "learning_rate": 1.25e-05, "loss": 3.3746, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 66.8377, "num_token_overlap": 14.6255, "num_token_query": 37.4167, "num_token_union": 65.3973, "num_word_context": 202.5112, "num_word_doc": 49.8475, "num_word_query": 28.0231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.6672, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3325, "queue_k_norm": 1.4544, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4167, "sent_len_1": 66.8377, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8837, "stdk": 0.0487, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3693, "doc_norm": 1.4492, "encoder_q-embeddings": 359.5298, "encoder_q-layer.0": 249.794, "encoder_q-layer.1": 282.2402, "encoder_q-layer.10": 321.362, "encoder_q-layer.11": 797.7589, "encoder_q-layer.2": 315.7674, "encoder_q-layer.3": 347.243, "encoder_q-layer.4": 349.7744, "encoder_q-layer.5": 326.7461, "encoder_q-layer.6": 336.0763, "encoder_q-layer.7": 342.3755, "encoder_q-layer.8": 333.1642, "encoder_q-layer.9": 304.2197, "epoch": 0.76, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 0.9097, "learning_rate": 1.2444444444444445e-05, "loss": 3.3693, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.7451, "num_token_overlap": 14.5938, "num_token_query": 37.4447, "num_token_union": 65.4541, "num_word_context": 202.0008, "num_word_doc": 49.8088, "num_word_query": 28.053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 591.0546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3466, "queue_k_norm": 1.455, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4447, "sent_len_1": 66.7451, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.4225, "stdk": 0.0485, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.4087, "doc_norm": 1.4508, "encoder_q-embeddings": 959.7482, "encoder_q-layer.0": 696.2827, "encoder_q-layer.1": 757.2327, "encoder_q-layer.10": 290.4405, "encoder_q-layer.11": 778.1494, "encoder_q-layer.2": 853.3008, "encoder_q-layer.3": 1005.2921, "encoder_q-layer.4": 1030.6357, "encoder_q-layer.5": 1041.7374, "encoder_q-layer.6": 927.1053, "encoder_q-layer.7": 830.4673, "encoder_q-layer.8": 595.6125, "encoder_q-layer.9": 316.1276, "epoch": 0.76, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 0.9204, "learning_rate": 1.238888888888889e-05, "loss": 3.4087, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 66.8327, "num_token_overlap": 14.4927, "num_token_query": 37.1653, "num_token_union": 65.357, "num_word_context": 202.2104, "num_word_doc": 49.8903, "num_word_query": 27.8094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1235.5197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.3404, "queue_k_norm": 1.4544, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1653, "sent_len_1": 66.8327, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.6838, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.3652, "doc_norm": 1.4531, "encoder_q-embeddings": 1608.1925, "encoder_q-layer.0": 1107.097, "encoder_q-layer.1": 1218.2715, "encoder_q-layer.10": 318.8155, "encoder_q-layer.11": 773.0844, "encoder_q-layer.2": 1626.5924, "encoder_q-layer.3": 1673.9642, "encoder_q-layer.4": 1623.7982, "encoder_q-layer.5": 1583.1716, "encoder_q-layer.6": 1273.4673, "encoder_q-layer.7": 1245.6931, "encoder_q-layer.8": 851.436, "encoder_q-layer.9": 340.5345, "epoch": 0.76, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 0.9326, "learning_rate": 1.2333333333333334e-05, "loss": 3.3652, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.7279, "num_token_overlap": 14.5298, "num_token_query": 37.3097, "num_token_union": 65.4007, "num_word_context": 202.2406, "num_word_doc": 49.8433, "num_word_query": 27.9162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1918.1107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.3474, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3097, "sent_len_1": 66.7279, "sent_len_max_0": 127.995, "sent_len_max_1": 188.9038, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3956, "doc_norm": 1.4566, "encoder_q-embeddings": 378.574, "encoder_q-layer.0": 269.3743, "encoder_q-layer.1": 289.3139, "encoder_q-layer.10": 307.5652, "encoder_q-layer.11": 773.9255, "encoder_q-layer.2": 319.3855, "encoder_q-layer.3": 325.0171, "encoder_q-layer.4": 326.7331, "encoder_q-layer.5": 313.57, "encoder_q-layer.6": 310.5452, "encoder_q-layer.7": 334.1861, "encoder_q-layer.8": 351.2809, "encoder_q-layer.9": 296.2916, "epoch": 0.76, "inbatch_neg_score": 0.2952, "inbatch_pos_score": 0.9355, "learning_rate": 1.2277777777777778e-05, "loss": 3.3956, "norm_diff": 0.1144, "norm_loss": 0.0, "num_token_doc": 66.5475, "num_token_overlap": 14.5729, "num_token_query": 37.2184, "num_token_union": 65.182, "num_word_context": 202.0723, "num_word_doc": 49.6411, "num_word_query": 27.8628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 581.071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3422, "queue_k_norm": 1.4533, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2184, "sent_len_1": 66.5475, "sent_len_max_0": 127.985, "sent_len_max_1": 190.3212, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3702, "doc_norm": 1.4563, "encoder_q-embeddings": 633.5114, "encoder_q-layer.0": 462.4172, "encoder_q-layer.1": 505.4846, "encoder_q-layer.10": 284.6861, "encoder_q-layer.11": 735.8768, "encoder_q-layer.2": 631.287, "encoder_q-layer.3": 667.4628, "encoder_q-layer.4": 602.1237, "encoder_q-layer.5": 609.2188, "encoder_q-layer.6": 711.3941, "encoder_q-layer.7": 753.3578, "encoder_q-layer.8": 572.3212, "encoder_q-layer.9": 293.7866, "epoch": 0.76, "inbatch_neg_score": 0.2946, "inbatch_pos_score": 0.9497, "learning_rate": 1.2222222222222222e-05, "loss": 3.3702, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.6928, "num_token_overlap": 14.5813, "num_token_query": 37.3896, "num_token_union": 65.3799, "num_word_context": 202.1223, "num_word_doc": 49.8009, "num_word_query": 28.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 900.7247, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3466, "queue_k_norm": 1.4561, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3896, "sent_len_1": 66.6928, "sent_len_max_0": 128.0, "sent_len_max_1": 188.455, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3734, "doc_norm": 1.4568, "encoder_q-embeddings": 267.2715, "encoder_q-layer.0": 181.4389, "encoder_q-layer.1": 186.4363, "encoder_q-layer.10": 291.8455, "encoder_q-layer.11": 768.7044, "encoder_q-layer.2": 212.4079, "encoder_q-layer.3": 222.5289, "encoder_q-layer.4": 231.716, "encoder_q-layer.5": 239.7015, "encoder_q-layer.6": 263.5027, "encoder_q-layer.7": 271.6749, "encoder_q-layer.8": 309.2917, "encoder_q-layer.9": 283.3991, "epoch": 0.76, "inbatch_neg_score": 0.2926, "inbatch_pos_score": 0.9248, "learning_rate": 1.2166666666666668e-05, "loss": 3.3734, "norm_diff": 0.1168, "norm_loss": 0.0, "num_token_doc": 66.7293, "num_token_overlap": 14.6698, "num_token_query": 37.5328, "num_token_union": 65.3914, "num_word_context": 202.093, "num_word_doc": 49.7703, "num_word_query": 28.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 494.5834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.34, "queue_k_norm": 1.454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5328, "sent_len_1": 66.7293, "sent_len_max_0": 127.99, "sent_len_max_1": 190.1525, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.3811, "doc_norm": 1.452, "encoder_q-embeddings": 341.9843, "encoder_q-layer.0": 238.1909, "encoder_q-layer.1": 259.7286, "encoder_q-layer.10": 278.5411, "encoder_q-layer.11": 760.7767, "encoder_q-layer.2": 298.5131, "encoder_q-layer.3": 290.0179, "encoder_q-layer.4": 312.2047, "encoder_q-layer.5": 287.4516, "encoder_q-layer.6": 300.2085, "encoder_q-layer.7": 306.8383, "encoder_q-layer.8": 321.1534, "encoder_q-layer.9": 287.7145, "epoch": 0.76, "inbatch_neg_score": 0.2972, "inbatch_pos_score": 0.9248, "learning_rate": 1.2111111111111112e-05, "loss": 3.3811, "norm_diff": 0.1079, "norm_loss": 0.0, "num_token_doc": 66.9216, "num_token_overlap": 14.5911, "num_token_query": 37.2626, "num_token_union": 65.3908, "num_word_context": 202.1706, "num_word_doc": 49.9096, "num_word_query": 27.9018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 543.4315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.3441, "queue_k_norm": 1.4578, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2626, "sent_len_1": 66.9216, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1175, "stdk": 0.0486, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.38, "doc_norm": 1.4565, "encoder_q-embeddings": 251.759, "encoder_q-layer.0": 174.379, "encoder_q-layer.1": 174.7333, "encoder_q-layer.10": 306.2045, "encoder_q-layer.11": 803.8934, "encoder_q-layer.2": 197.057, "encoder_q-layer.3": 198.6, "encoder_q-layer.4": 211.9921, "encoder_q-layer.5": 206.2017, "encoder_q-layer.6": 229.9646, "encoder_q-layer.7": 254.6047, "encoder_q-layer.8": 334.0152, "encoder_q-layer.9": 295.3045, "epoch": 0.76, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 0.9502, "learning_rate": 1.2055555555555556e-05, "loss": 3.38, "norm_diff": 0.099, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 14.5303, "num_token_query": 37.3006, "num_token_union": 65.4256, "num_word_context": 202.3825, "num_word_doc": 49.844, "num_word_query": 27.9575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 498.0824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3575, "queue_k_norm": 1.4562, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3006, "sent_len_1": 66.7575, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0037, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3755, "doc_norm": 1.462, "encoder_q-embeddings": 403.652, "encoder_q-layer.0": 276.666, "encoder_q-layer.1": 289.7037, "encoder_q-layer.10": 299.9347, "encoder_q-layer.11": 795.6745, "encoder_q-layer.2": 338.1667, "encoder_q-layer.3": 351.821, "encoder_q-layer.4": 374.6864, "encoder_q-layer.5": 356.0405, "encoder_q-layer.6": 359.6847, "encoder_q-layer.7": 341.4081, "encoder_q-layer.8": 352.1224, "encoder_q-layer.9": 295.1085, "epoch": 0.77, "inbatch_neg_score": 0.3004, "inbatch_pos_score": 0.9541, "learning_rate": 1.2e-05, "loss": 3.3755, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.6391, "num_token_overlap": 14.5447, "num_token_query": 37.1732, "num_token_union": 65.1879, "num_word_context": 202.1622, "num_word_doc": 49.672, "num_word_query": 27.8205, "postclip_grad_norm": 1.0, "preclip_grad_norm": 610.2851, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.348, "queue_k_norm": 1.4559, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1732, "sent_len_1": 66.6391, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.5525, "stdk": 0.049, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3788, "doc_norm": 1.456, "encoder_q-embeddings": 684.4714, "encoder_q-layer.0": 488.2869, "encoder_q-layer.1": 520.4778, "encoder_q-layer.10": 593.582, "encoder_q-layer.11": 1525.517, "encoder_q-layer.2": 600.7651, "encoder_q-layer.3": 594.8386, "encoder_q-layer.4": 644.2265, "encoder_q-layer.5": 623.601, "encoder_q-layer.6": 690.0472, "encoder_q-layer.7": 722.2447, "encoder_q-layer.8": 700.0914, "encoder_q-layer.9": 541.7147, "epoch": 0.77, "inbatch_neg_score": 0.2978, "inbatch_pos_score": 0.9355, "learning_rate": 1.1944444444444446e-05, "loss": 3.3788, "norm_diff": 0.1238, "norm_loss": 0.0, "num_token_doc": 66.8168, "num_token_overlap": 14.5736, "num_token_query": 37.2639, "num_token_union": 65.3675, "num_word_context": 202.5218, "num_word_doc": 49.8522, "num_word_query": 27.8882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1122.1783, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.3321, "queue_k_norm": 1.4552, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2639, "sent_len_1": 66.8168, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.1975, "stdk": 0.0487, "stdq": 0.0435, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3719, "doc_norm": 1.4617, "encoder_q-embeddings": 614.8851, "encoder_q-layer.0": 444.916, "encoder_q-layer.1": 460.8904, "encoder_q-layer.10": 585.3331, "encoder_q-layer.11": 1523.8448, "encoder_q-layer.2": 512.4447, "encoder_q-layer.3": 530.3328, "encoder_q-layer.4": 577.0908, "encoder_q-layer.5": 589.6174, "encoder_q-layer.6": 593.434, "encoder_q-layer.7": 576.4311, "encoder_q-layer.8": 619.7032, "encoder_q-layer.9": 558.5858, "epoch": 0.77, "inbatch_neg_score": 0.3011, "inbatch_pos_score": 0.9375, "learning_rate": 1.188888888888889e-05, "loss": 3.3719, "norm_diff": 0.1127, "norm_loss": 0.0, "num_token_doc": 66.824, "num_token_overlap": 14.5892, "num_token_query": 37.3763, "num_token_union": 65.4167, "num_word_context": 202.6666, "num_word_doc": 49.8762, "num_word_query": 27.9979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1054.9478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.349, "queue_k_norm": 1.4562, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3763, "sent_len_1": 66.824, "sent_len_max_0": 127.995, "sent_len_max_1": 191.88, "stdk": 0.049, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3606, "doc_norm": 1.4564, "encoder_q-embeddings": 811.8026, "encoder_q-layer.0": 607.5009, "encoder_q-layer.1": 639.0592, "encoder_q-layer.10": 575.1732, "encoder_q-layer.11": 1514.3696, "encoder_q-layer.2": 816.0845, "encoder_q-layer.3": 884.3209, "encoder_q-layer.4": 888.6693, "encoder_q-layer.5": 842.5089, "encoder_q-layer.6": 818.8542, "encoder_q-layer.7": 794.4955, "encoder_q-layer.8": 808.045, "encoder_q-layer.9": 567.1168, "epoch": 0.77, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 0.9507, "learning_rate": 1.1833333333333334e-05, "loss": 3.3606, "norm_diff": 0.1062, "norm_loss": 0.0, "num_token_doc": 66.4093, "num_token_overlap": 14.6239, "num_token_query": 37.6022, "num_token_union": 65.2797, "num_word_context": 202.2248, "num_word_doc": 49.5542, "num_word_query": 28.1445, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1305.114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2996, "query_norm": 1.3502, "queue_k_norm": 1.4558, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6022, "sent_len_1": 66.4093, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0362, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3781, "doc_norm": 1.4554, "encoder_q-embeddings": 529.7408, "encoder_q-layer.0": 356.2011, "encoder_q-layer.1": 367.6753, "encoder_q-layer.10": 531.6194, "encoder_q-layer.11": 1469.6025, "encoder_q-layer.2": 410.7268, "encoder_q-layer.3": 411.7552, "encoder_q-layer.4": 430.2268, "encoder_q-layer.5": 440.9181, "encoder_q-layer.6": 470.5914, "encoder_q-layer.7": 492.3418, "encoder_q-layer.8": 592.0877, "encoder_q-layer.9": 551.2495, "epoch": 0.77, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 0.9609, "learning_rate": 1.1777777777777778e-05, "loss": 3.3781, "norm_diff": 0.086, "norm_loss": 0.0, "num_token_doc": 67.072, "num_token_overlap": 14.6532, "num_token_query": 37.5121, "num_token_union": 65.6167, "num_word_context": 202.4763, "num_word_doc": 50.1175, "num_word_query": 28.0771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 962.7892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3015, "query_norm": 1.3694, "queue_k_norm": 1.4574, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5121, "sent_len_1": 67.072, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5387, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3689, "doc_norm": 1.46, "encoder_q-embeddings": 1712.329, "encoder_q-layer.0": 1157.046, "encoder_q-layer.1": 1341.5963, "encoder_q-layer.10": 581.5428, "encoder_q-layer.11": 1592.5363, "encoder_q-layer.2": 1592.6729, "encoder_q-layer.3": 1528.5464, "encoder_q-layer.4": 1162.0975, "encoder_q-layer.5": 1052.0387, "encoder_q-layer.6": 892.965, "encoder_q-layer.7": 819.5363, "encoder_q-layer.8": 732.2189, "encoder_q-layer.9": 555.3473, "epoch": 0.77, "inbatch_neg_score": 0.3033, "inbatch_pos_score": 0.9536, "learning_rate": 1.1722222222222224e-05, "loss": 3.3689, "norm_diff": 0.1077, "norm_loss": 0.0, "num_token_doc": 66.7032, "num_token_overlap": 14.5768, "num_token_query": 37.3247, "num_token_union": 65.3565, "num_word_context": 202.303, "num_word_doc": 49.7894, "num_word_query": 27.9116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1883.2194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3037, "query_norm": 1.3523, "queue_k_norm": 1.4567, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3247, "sent_len_1": 66.7032, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4062, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3598, "doc_norm": 1.4614, "encoder_q-embeddings": 1191.2726, "encoder_q-layer.0": 858.5801, "encoder_q-layer.1": 1018.6976, "encoder_q-layer.10": 611.8918, "encoder_q-layer.11": 1605.3311, "encoder_q-layer.2": 1297.7496, "encoder_q-layer.3": 1209.6156, "encoder_q-layer.4": 1217.106, "encoder_q-layer.5": 1193.4001, "encoder_q-layer.6": 1154.9968, "encoder_q-layer.7": 1024.7581, "encoder_q-layer.8": 988.3324, "encoder_q-layer.9": 619.7065, "epoch": 0.77, "inbatch_neg_score": 0.3066, "inbatch_pos_score": 0.9668, "learning_rate": 1.1666666666666668e-05, "loss": 3.3598, "norm_diff": 0.0978, "norm_loss": 0.0, "num_token_doc": 66.8344, "num_token_overlap": 14.6417, "num_token_query": 37.4216, "num_token_union": 65.4033, "num_word_context": 202.4237, "num_word_doc": 49.8464, "num_word_query": 28.0482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1672.1038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3054, "query_norm": 1.3635, "queue_k_norm": 1.4581, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4216, "sent_len_1": 66.8344, "sent_len_max_0": 128.0, "sent_len_max_1": 191.775, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3655, "doc_norm": 1.4635, "encoder_q-embeddings": 743.364, "encoder_q-layer.0": 495.8962, "encoder_q-layer.1": 558.2789, "encoder_q-layer.10": 616.541, "encoder_q-layer.11": 1463.58, "encoder_q-layer.2": 633.6041, "encoder_q-layer.3": 659.3591, "encoder_q-layer.4": 752.0751, "encoder_q-layer.5": 813.6538, "encoder_q-layer.6": 722.6584, "encoder_q-layer.7": 649.4999, "encoder_q-layer.8": 666.9401, "encoder_q-layer.9": 571.7411, "epoch": 0.77, "inbatch_neg_score": 0.3097, "inbatch_pos_score": 0.9658, "learning_rate": 1.1611111111111112e-05, "loss": 3.3655, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 66.691, "num_token_overlap": 14.5183, "num_token_query": 37.1994, "num_token_union": 65.2739, "num_word_context": 202.1196, "num_word_doc": 49.7858, "num_word_query": 27.8649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1156.3872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3098, "query_norm": 1.3531, "queue_k_norm": 1.4575, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1994, "sent_len_1": 66.691, "sent_len_max_0": 127.9975, "sent_len_max_1": 192.165, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3674, "doc_norm": 1.4596, "encoder_q-embeddings": 574.1997, "encoder_q-layer.0": 385.9961, "encoder_q-layer.1": 411.8943, "encoder_q-layer.10": 579.7614, "encoder_q-layer.11": 1505.9993, "encoder_q-layer.2": 483.206, "encoder_q-layer.3": 490.7122, "encoder_q-layer.4": 533.1637, "encoder_q-layer.5": 511.4565, "encoder_q-layer.6": 522.6659, "encoder_q-layer.7": 610.8842, "encoder_q-layer.8": 652.1972, "encoder_q-layer.9": 568.279, "epoch": 0.77, "inbatch_neg_score": 0.307, "inbatch_pos_score": 0.9658, "learning_rate": 1.1555555555555556e-05, "loss": 3.3674, "norm_diff": 0.1029, "norm_loss": 0.0, "num_token_doc": 66.706, "num_token_overlap": 14.6051, "num_token_query": 37.189, "num_token_union": 65.2231, "num_word_context": 202.2973, "num_word_doc": 49.7728, "num_word_query": 27.8353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1029.8098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.3567, "queue_k_norm": 1.4572, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.189, "sent_len_1": 66.706, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.7038, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3763, "doc_norm": 1.4628, "encoder_q-embeddings": 506.6086, "encoder_q-layer.0": 339.7858, "encoder_q-layer.1": 346.399, "encoder_q-layer.10": 558.3723, "encoder_q-layer.11": 1482.3784, "encoder_q-layer.2": 394.3629, "encoder_q-layer.3": 401.8965, "encoder_q-layer.4": 424.1837, "encoder_q-layer.5": 408.5897, "encoder_q-layer.6": 458.0936, "encoder_q-layer.7": 498.8089, "encoder_q-layer.8": 601.4493, "encoder_q-layer.9": 553.3658, "epoch": 0.77, "inbatch_neg_score": 0.311, "inbatch_pos_score": 0.9727, "learning_rate": 1.1500000000000002e-05, "loss": 3.3763, "norm_diff": 0.103, "norm_loss": 0.0, "num_token_doc": 66.6889, "num_token_overlap": 14.6022, "num_token_query": 37.2456, "num_token_union": 65.2892, "num_word_context": 202.1529, "num_word_doc": 49.7868, "num_word_query": 27.8808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 957.458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3096, "query_norm": 1.3598, "queue_k_norm": 1.4576, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2456, "sent_len_1": 66.6889, "sent_len_max_0": 128.0, "sent_len_max_1": 188.11, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3753, "doc_norm": 1.4572, "encoder_q-embeddings": 750.2412, "encoder_q-layer.0": 551.8286, "encoder_q-layer.1": 626.3694, "encoder_q-layer.10": 643.8286, "encoder_q-layer.11": 1546.8888, "encoder_q-layer.2": 745.7239, "encoder_q-layer.3": 772.416, "encoder_q-layer.4": 851.8434, "encoder_q-layer.5": 886.0259, "encoder_q-layer.6": 894.7303, "encoder_q-layer.7": 1001.2911, "encoder_q-layer.8": 904.1698, "encoder_q-layer.9": 594.4056, "epoch": 0.78, "inbatch_neg_score": 0.3149, "inbatch_pos_score": 0.9697, "learning_rate": 1.1444444444444446e-05, "loss": 3.3753, "norm_diff": 0.0847, "norm_loss": 0.0, "num_token_doc": 66.5179, "num_token_overlap": 14.5596, "num_token_query": 37.2563, "num_token_union": 65.191, "num_word_context": 202.0677, "num_word_doc": 49.642, "num_word_query": 27.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1327.336, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.3724, "queue_k_norm": 1.4576, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2563, "sent_len_1": 66.5179, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.0062, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3793, "doc_norm": 1.4669, "encoder_q-embeddings": 601.2607, "encoder_q-layer.0": 409.7065, "encoder_q-layer.1": 448.6156, "encoder_q-layer.10": 639.5937, "encoder_q-layer.11": 1585.6232, "encoder_q-layer.2": 525.3794, "encoder_q-layer.3": 551.5591, "encoder_q-layer.4": 568.7578, "encoder_q-layer.5": 544.5297, "encoder_q-layer.6": 576.8109, "encoder_q-layer.7": 600.2394, "encoder_q-layer.8": 684.6179, "encoder_q-layer.9": 581.4427, "epoch": 0.78, "inbatch_neg_score": 0.3145, "inbatch_pos_score": 0.9844, "learning_rate": 1.138888888888889e-05, "loss": 3.3793, "norm_diff": 0.0903, "norm_loss": 0.0, "num_token_doc": 67.1017, "num_token_overlap": 14.6011, "num_token_query": 37.2338, "num_token_union": 65.4904, "num_word_context": 202.4935, "num_word_doc": 50.0838, "num_word_query": 27.8895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1082.0294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3152, "query_norm": 1.3765, "queue_k_norm": 1.4599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2338, "sent_len_1": 67.1017, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5538, "stdk": 0.0491, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.3586, "doc_norm": 1.46, "encoder_q-embeddings": 1079.8644, "encoder_q-layer.0": 745.8008, "encoder_q-layer.1": 799.6602, "encoder_q-layer.10": 584.8895, "encoder_q-layer.11": 1545.9503, "encoder_q-layer.2": 973.4558, "encoder_q-layer.3": 1020.4974, "encoder_q-layer.4": 1167.9672, "encoder_q-layer.5": 1240.2858, "encoder_q-layer.6": 1165.0848, "encoder_q-layer.7": 999.5847, "encoder_q-layer.8": 875.8061, "encoder_q-layer.9": 636.3799, "epoch": 0.78, "inbatch_neg_score": 0.3166, "inbatch_pos_score": 0.9727, "learning_rate": 1.1333333333333334e-05, "loss": 3.3586, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.7222, "num_token_overlap": 14.5815, "num_token_query": 37.2929, "num_token_union": 65.3285, "num_word_context": 201.9302, "num_word_doc": 49.7756, "num_word_query": 27.9082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1553.1864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3171, "query_norm": 1.3603, "queue_k_norm": 1.4599, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2929, "sent_len_1": 66.7222, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.28, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3719, "doc_norm": 1.4669, "encoder_q-embeddings": 702.4623, "encoder_q-layer.0": 469.9654, "encoder_q-layer.1": 525.7184, "encoder_q-layer.10": 627.5787, "encoder_q-layer.11": 1596.8276, "encoder_q-layer.2": 589.0229, "encoder_q-layer.3": 637.2648, "encoder_q-layer.4": 715.7999, "encoder_q-layer.5": 673.2514, "encoder_q-layer.6": 718.1805, "encoder_q-layer.7": 709.8459, "encoder_q-layer.8": 724.5066, "encoder_q-layer.9": 616.2728, "epoch": 0.78, "inbatch_neg_score": 0.3158, "inbatch_pos_score": 0.9497, "learning_rate": 1.127777777777778e-05, "loss": 3.3719, "norm_diff": 0.1254, "norm_loss": 0.0, "num_token_doc": 66.837, "num_token_overlap": 14.5653, "num_token_query": 37.3435, "num_token_union": 65.3748, "num_word_context": 202.3678, "num_word_doc": 49.798, "num_word_query": 27.9379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1147.4381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.3415, "queue_k_norm": 1.4617, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3435, "sent_len_1": 66.837, "sent_len_max_0": 127.9912, "sent_len_max_1": 192.0888, "stdk": 0.049, "stdq": 0.0436, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.363, "doc_norm": 1.4655, "encoder_q-embeddings": 497.9421, "encoder_q-layer.0": 323.3286, "encoder_q-layer.1": 329.3922, "encoder_q-layer.10": 548.0823, "encoder_q-layer.11": 1528.6351, "encoder_q-layer.2": 366.5633, "encoder_q-layer.3": 374.5404, "encoder_q-layer.4": 395.0932, "encoder_q-layer.5": 390.5434, "encoder_q-layer.6": 430.9643, "encoder_q-layer.7": 495.3173, "encoder_q-layer.8": 579.2065, "encoder_q-layer.9": 556.054, "epoch": 0.78, "inbatch_neg_score": 0.3131, "inbatch_pos_score": 0.9697, "learning_rate": 1.1222222222222224e-05, "loss": 3.363, "norm_diff": 0.1151, "norm_loss": 0.0, "num_token_doc": 66.9941, "num_token_overlap": 14.6253, "num_token_query": 37.2881, "num_token_union": 65.4283, "num_word_context": 202.4709, "num_word_doc": 49.979, "num_word_query": 27.9131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.1769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.3504, "queue_k_norm": 1.4619, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2881, "sent_len_1": 66.9941, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0625, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3725, "doc_norm": 1.4586, "encoder_q-embeddings": 1418.6449, "encoder_q-layer.0": 1015.2065, "encoder_q-layer.1": 1231.0984, "encoder_q-layer.10": 586.3354, "encoder_q-layer.11": 1492.5878, "encoder_q-layer.2": 1552.7061, "encoder_q-layer.3": 1505.5072, "encoder_q-layer.4": 1308.8197, "encoder_q-layer.5": 1036.6105, "encoder_q-layer.6": 1010.144, "encoder_q-layer.7": 938.6148, "encoder_q-layer.8": 812.1956, "encoder_q-layer.9": 598.5979, "epoch": 0.78, "inbatch_neg_score": 0.3135, "inbatch_pos_score": 0.9829, "learning_rate": 1.1166666666666668e-05, "loss": 3.3725, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.7252, "num_token_overlap": 14.6067, "num_token_query": 37.3624, "num_token_union": 65.3435, "num_word_context": 202.3347, "num_word_doc": 49.8158, "num_word_query": 27.9863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1800.0361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.314, "query_norm": 1.3548, "queue_k_norm": 1.458, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3624, "sent_len_1": 66.7252, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.1375, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3714, "doc_norm": 1.4668, "encoder_q-embeddings": 506.5542, "encoder_q-layer.0": 336.057, "encoder_q-layer.1": 349.6931, "encoder_q-layer.10": 637.9972, "encoder_q-layer.11": 1563.1295, "encoder_q-layer.2": 393.8145, "encoder_q-layer.3": 399.8149, "encoder_q-layer.4": 410.6161, "encoder_q-layer.5": 414.2664, "encoder_q-layer.6": 462.0955, "encoder_q-layer.7": 518.7321, "encoder_q-layer.8": 599.9926, "encoder_q-layer.9": 567.6355, "epoch": 0.78, "inbatch_neg_score": 0.3167, "inbatch_pos_score": 0.9434, "learning_rate": 1.1111111111111112e-05, "loss": 3.3714, "norm_diff": 0.1273, "norm_loss": 0.0, "num_token_doc": 66.4435, "num_token_overlap": 14.583, "num_token_query": 37.2814, "num_token_union": 65.1624, "num_word_context": 201.7155, "num_word_doc": 49.5685, "num_word_query": 27.9077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 976.6531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3159, "query_norm": 1.3395, "queue_k_norm": 1.4613, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2814, "sent_len_1": 66.4435, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3162, "stdk": 0.049, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 27.6844, "dev_samples_per_second": 2.312, "dev_steps_per_second": 0.036, "epoch": 0.78, "step": 80000, "test_accuracy": 93.65234375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3804752230644226, "test_doc_norm": 1.440153956413269, "test_inbatch_neg_score": 0.6586048603057861, "test_inbatch_pos_score": 1.6055307388305664, "test_loss": 0.3804752230644226, "test_loss_align": 0.9985188245773315, "test_loss_unif": 3.7992591857910156, "test_loss_unif_q@queue": 3.7992594242095947, "test_norm_diff": 0.013208428397774696, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3067432641983032, "test_query_norm": 1.4501450061798096, "test_queue_k_norm": 1.4612932205200195, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0428745299577713, "test_stdq": 0.04256283491849899, "test_stdqueue_k": 0.048849917948246, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.6844, "dev_samples_per_second": 2.312, "dev_steps_per_second": 0.036, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.38391, "eval_beir-arguana_recall@10": 0.64296, "eval_beir-arguana_recall@100": 0.93528, "eval_beir-arguana_recall@20": 0.77312, "eval_beir-avg_ndcg@10": 0.380045, "eval_beir-avg_recall@10": 0.44640599999999997, "eval_beir-avg_recall@100": 0.6316375833333333, "eval_beir-avg_recall@20": 0.5078195833333334, "eval_beir-cqadupstack_ndcg@10": 0.26835, "eval_beir-cqadupstack_recall@10": 0.36192, "eval_beir-cqadupstack_recall@100": 0.5929058333333334, "eval_beir-cqadupstack_recall@20": 0.42825583333333345, "eval_beir-fiqa_ndcg@10": 0.23483, "eval_beir-fiqa_recall@10": 0.28974, "eval_beir-fiqa_recall@100": 0.56989, "eval_beir-fiqa_recall@20": 0.3615, "eval_beir-nfcorpus_ndcg@10": 0.31046, "eval_beir-nfcorpus_recall@10": 0.15179, "eval_beir-nfcorpus_recall@100": 0.29271, "eval_beir-nfcorpus_recall@20": 0.18312, "eval_beir-nq_ndcg@10": 0.26789, "eval_beir-nq_recall@10": 0.44438, "eval_beir-nq_recall@100": 0.78739, "eval_beir-nq_recall@20": 0.56033, "eval_beir-quora_ndcg@10": 0.77354, "eval_beir-quora_recall@10": 0.88335, "eval_beir-quora_recall@100": 0.97711, "eval_beir-quora_recall@20": 0.92696, "eval_beir-scidocs_ndcg@10": 0.14633, "eval_beir-scidocs_recall@10": 0.15752, "eval_beir-scidocs_recall@100": 0.36187, "eval_beir-scidocs_recall@20": 0.21107, "eval_beir-scifact_ndcg@10": 0.65569, "eval_beir-scifact_recall@10": 0.78467, "eval_beir-scifact_recall@100": 0.90489, "eval_beir-scifact_recall@20": 0.83578, "eval_beir-trec-covid_ndcg@10": 0.5784, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.453, "eval_beir-trec-covid_recall@20": 0.586, "eval_beir-webis-touche2020_ndcg@10": 0.18105, "eval_beir-webis-touche2020_recall@10": 0.13573, "eval_beir-webis-touche2020_recall@100": 0.44133, "eval_beir-webis-touche2020_recall@20": 0.21206, "eval_senteval-avg_sts": 0.7450926564479436, "eval_senteval-sickr_spearman": 0.7141311712609013, "eval_senteval-stsb_spearman": 0.776054141634986, "step": 80000, "test_accuracy": 93.65234375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3804752230644226, "test_doc_norm": 1.440153956413269, "test_inbatch_neg_score": 0.6586048603057861, "test_inbatch_pos_score": 1.6055307388305664, "test_loss": 0.3804752230644226, "test_loss_align": 0.9985188245773315, "test_loss_unif": 3.7992591857910156, "test_loss_unif_q@queue": 3.7992594242095947, "test_norm_diff": 0.013208428397774696, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3067432641983032, "test_query_norm": 1.4501450061798096, "test_queue_k_norm": 1.4612932205200195, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0428745299577713, "test_stdq": 0.04256283491849899, "test_stdqueue_k": 0.048849917948246, "test_stdqueue_q": 0.0 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.372, "doc_norm": 1.4634, "encoder_q-embeddings": 1180.8341, "encoder_q-layer.0": 744.8401, "encoder_q-layer.1": 854.0525, "encoder_q-layer.10": 561.7668, "encoder_q-layer.11": 1524.7433, "encoder_q-layer.2": 1013.8132, "encoder_q-layer.3": 1043.6569, "encoder_q-layer.4": 1069.4786, "encoder_q-layer.5": 1073.7399, "encoder_q-layer.6": 1154.5209, "encoder_q-layer.7": 1062.4055, "encoder_q-layer.8": 862.2188, "encoder_q-layer.9": 571.7302, "epoch": 0.78, "inbatch_neg_score": 0.3161, "inbatch_pos_score": 0.9614, "learning_rate": 1.1055555555555556e-05, "loss": 3.372, "norm_diff": 0.1048, "norm_loss": 0.0, "num_token_doc": 66.6906, "num_token_overlap": 14.5422, "num_token_query": 37.2913, "num_token_union": 65.3361, "num_word_context": 202.2125, "num_word_doc": 49.7605, "num_word_query": 27.9261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1542.3456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.3586, "queue_k_norm": 1.4629, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2913, "sent_len_1": 66.6906, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5825, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3618, "doc_norm": 1.4598, "encoder_q-embeddings": 1649.0199, "encoder_q-layer.0": 1192.4943, "encoder_q-layer.1": 1289.8052, "encoder_q-layer.10": 673.113, "encoder_q-layer.11": 1581.8499, "encoder_q-layer.2": 1652.3846, "encoder_q-layer.3": 1799.2883, "encoder_q-layer.4": 2194.7073, "encoder_q-layer.5": 2174.4175, "encoder_q-layer.6": 2431.6223, "encoder_q-layer.7": 1477.1484, "encoder_q-layer.8": 878.0218, "encoder_q-layer.9": 596.58, "epoch": 0.78, "inbatch_neg_score": 0.3209, "inbatch_pos_score": 0.9614, "learning_rate": 1.1000000000000001e-05, "loss": 3.3618, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 66.7326, "num_token_overlap": 14.515, "num_token_query": 37.2906, "num_token_union": 65.3443, "num_word_context": 202.4241, "num_word_doc": 49.751, "num_word_query": 27.8975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2396.35, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.3533, "queue_k_norm": 1.463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2906, "sent_len_1": 66.7326, "sent_len_max_0": 128.0, "sent_len_max_1": 192.085, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3401, "doc_norm": 1.4609, "encoder_q-embeddings": 585.8879, "encoder_q-layer.0": 408.3764, "encoder_q-layer.1": 452.9002, "encoder_q-layer.10": 615.1253, "encoder_q-layer.11": 1508.631, "encoder_q-layer.2": 499.9394, "encoder_q-layer.3": 489.6203, "encoder_q-layer.4": 506.5027, "encoder_q-layer.5": 468.5596, "encoder_q-layer.6": 544.5513, "encoder_q-layer.7": 549.5995, "encoder_q-layer.8": 627.5972, "encoder_q-layer.9": 564.8121, "epoch": 0.78, "inbatch_neg_score": 0.3207, "inbatch_pos_score": 0.9697, "learning_rate": 1.0944444444444445e-05, "loss": 3.3401, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 67.0085, "num_token_overlap": 14.6442, "num_token_query": 37.4916, "num_token_union": 65.5854, "num_word_context": 202.8098, "num_word_doc": 50.0135, "num_word_query": 28.0639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1016.4962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.3397, "queue_k_norm": 1.4617, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4916, "sent_len_1": 67.0085, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5062, "stdk": 0.0487, "stdq": 0.0436, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3763, "doc_norm": 1.462, "encoder_q-embeddings": 537.5359, "encoder_q-layer.0": 355.0171, "encoder_q-layer.1": 373.4593, "encoder_q-layer.10": 564.0461, "encoder_q-layer.11": 1510.5538, "encoder_q-layer.2": 426.2266, "encoder_q-layer.3": 426.9426, "encoder_q-layer.4": 450.1064, "encoder_q-layer.5": 463.3889, "encoder_q-layer.6": 514.1285, "encoder_q-layer.7": 549.8853, "encoder_q-layer.8": 599.8624, "encoder_q-layer.9": 547.5283, "epoch": 0.78, "inbatch_neg_score": 0.3185, "inbatch_pos_score": 0.9727, "learning_rate": 1.088888888888889e-05, "loss": 3.3763, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.5422, "num_token_overlap": 14.6182, "num_token_query": 37.3843, "num_token_union": 65.2557, "num_word_context": 202.008, "num_word_doc": 49.6685, "num_word_query": 27.9628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 987.0158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.3614, "queue_k_norm": 1.4621, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3843, "sent_len_1": 66.5422, "sent_len_max_0": 128.0, "sent_len_max_1": 187.4663, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3587, "doc_norm": 1.469, "encoder_q-embeddings": 5648.124, "encoder_q-layer.0": 4345.4355, "encoder_q-layer.1": 5131.4692, "encoder_q-layer.10": 1229.6084, "encoder_q-layer.11": 3204.0754, "encoder_q-layer.2": 6392.3853, "encoder_q-layer.3": 6585.0347, "encoder_q-layer.4": 8054.998, "encoder_q-layer.5": 9287.9072, "encoder_q-layer.6": 8687.8086, "encoder_q-layer.7": 6412.6304, "encoder_q-layer.8": 3712.7363, "encoder_q-layer.9": 1460.6548, "epoch": 0.79, "inbatch_neg_score": 0.3195, "inbatch_pos_score": 0.9624, "learning_rate": 1.0833333333333334e-05, "loss": 3.3587, "norm_diff": 0.1055, "norm_loss": 0.0, "num_token_doc": 66.7058, "num_token_overlap": 14.6292, "num_token_query": 37.367, "num_token_union": 65.3353, "num_word_context": 202.1692, "num_word_doc": 49.7768, "num_word_query": 27.9988, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8824.3921, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3198, "query_norm": 1.3635, "queue_k_norm": 1.4621, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.367, "sent_len_1": 66.7058, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8125, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3609, "doc_norm": 1.4662, "encoder_q-embeddings": 1040.4834, "encoder_q-layer.0": 708.8131, "encoder_q-layer.1": 735.1418, "encoder_q-layer.10": 1210.98, "encoder_q-layer.11": 3149.0183, "encoder_q-layer.2": 819.6788, "encoder_q-layer.3": 826.1669, "encoder_q-layer.4": 887.5594, "encoder_q-layer.5": 902.0888, "encoder_q-layer.6": 980.2561, "encoder_q-layer.7": 1081.3425, "encoder_q-layer.8": 1309.2095, "encoder_q-layer.9": 1201.1162, "epoch": 0.79, "inbatch_neg_score": 0.3231, "inbatch_pos_score": 0.9609, "learning_rate": 1.0777777777777778e-05, "loss": 3.3609, "norm_diff": 0.112, "norm_loss": 0.0, "num_token_doc": 66.7424, "num_token_overlap": 14.6484, "num_token_query": 37.4455, "num_token_union": 65.3834, "num_word_context": 202.6508, "num_word_doc": 49.8193, "num_word_query": 28.0567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1994.3243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.322, "query_norm": 1.3543, "queue_k_norm": 1.4634, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4455, "sent_len_1": 66.7424, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4613, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.3768, "doc_norm": 1.4659, "encoder_q-embeddings": 1063.2313, "encoder_q-layer.0": 689.0756, "encoder_q-layer.1": 726.2006, "encoder_q-layer.10": 1289.8051, "encoder_q-layer.11": 3288.4299, "encoder_q-layer.2": 829.7416, "encoder_q-layer.3": 878.5861, "encoder_q-layer.4": 919.8901, "encoder_q-layer.5": 932.0891, "encoder_q-layer.6": 942.5846, "encoder_q-layer.7": 1061.043, "encoder_q-layer.8": 1334.3962, "encoder_q-layer.9": 1166.7666, "epoch": 0.79, "inbatch_neg_score": 0.3216, "inbatch_pos_score": 0.9375, "learning_rate": 1.0722222222222222e-05, "loss": 3.3768, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 67.0171, "num_token_overlap": 14.5574, "num_token_query": 37.2199, "num_token_union": 65.4784, "num_word_context": 202.712, "num_word_doc": 49.9786, "num_word_query": 27.8475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2097.1454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.3436, "queue_k_norm": 1.4657, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2199, "sent_len_1": 67.0171, "sent_len_max_0": 128.0, "sent_len_max_1": 188.61, "stdk": 0.0489, "stdq": 0.0437, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.36, "doc_norm": 1.4627, "encoder_q-embeddings": 1133.5116, "encoder_q-layer.0": 762.8468, "encoder_q-layer.1": 719.2587, "encoder_q-layer.10": 1224.2211, "encoder_q-layer.11": 3115.0205, "encoder_q-layer.2": 783.975, "encoder_q-layer.3": 792.2725, "encoder_q-layer.4": 847.4871, "encoder_q-layer.5": 834.5775, "encoder_q-layer.6": 944.4816, "encoder_q-layer.7": 1058.4292, "encoder_q-layer.8": 1199.8964, "encoder_q-layer.9": 1103.0552, "epoch": 0.79, "inbatch_neg_score": 0.3228, "inbatch_pos_score": 0.98, "learning_rate": 1.0666666666666667e-05, "loss": 3.36, "norm_diff": 0.1068, "norm_loss": 0.0, "num_token_doc": 66.9446, "num_token_overlap": 14.6214, "num_token_query": 37.4161, "num_token_union": 65.5298, "num_word_context": 202.8256, "num_word_doc": 49.9179, "num_word_query": 28.0176, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1993.2556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3223, "query_norm": 1.3559, "queue_k_norm": 1.4638, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4161, "sent_len_1": 66.9446, "sent_len_max_0": 128.0, "sent_len_max_1": 190.03, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3824, "doc_norm": 1.4668, "encoder_q-embeddings": 1037.2986, "encoder_q-layer.0": 722.5908, "encoder_q-layer.1": 766.0241, "encoder_q-layer.10": 1165.7806, "encoder_q-layer.11": 2962.7417, "encoder_q-layer.2": 854.0811, "encoder_q-layer.3": 870.7861, "encoder_q-layer.4": 915.8026, "encoder_q-layer.5": 908.7193, "encoder_q-layer.6": 985.6865, "encoder_q-layer.7": 1090.5146, "encoder_q-layer.8": 1295.1831, "encoder_q-layer.9": 1150.55, "epoch": 0.79, "inbatch_neg_score": 0.3251, "inbatch_pos_score": 0.9785, "learning_rate": 1.0611111111111111e-05, "loss": 3.3824, "norm_diff": 0.1099, "norm_loss": 0.0, "num_token_doc": 66.9187, "num_token_overlap": 14.5592, "num_token_query": 37.1731, "num_token_union": 65.3872, "num_word_context": 202.5296, "num_word_doc": 49.9571, "num_word_query": 27.8, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1940.8187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.3569, "queue_k_norm": 1.4647, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1731, "sent_len_1": 66.9187, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5762, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3763, "doc_norm": 1.4669, "encoder_q-embeddings": 1565.5046, "encoder_q-layer.0": 1100.9459, "encoder_q-layer.1": 1258.0146, "encoder_q-layer.10": 569.6113, "encoder_q-layer.11": 1547.0714, "encoder_q-layer.2": 1444.0764, "encoder_q-layer.3": 1636.9091, "encoder_q-layer.4": 1873.5945, "encoder_q-layer.5": 1599.8417, "encoder_q-layer.6": 1186.8092, "encoder_q-layer.7": 932.3819, "encoder_q-layer.8": 745.5633, "encoder_q-layer.9": 578.117, "epoch": 0.79, "inbatch_neg_score": 0.3219, "inbatch_pos_score": 0.9692, "learning_rate": 1.0555555555555555e-05, "loss": 3.3763, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.5933, "num_token_overlap": 14.5957, "num_token_query": 37.4166, "num_token_union": 65.3396, "num_word_context": 202.2171, "num_word_doc": 49.6897, "num_word_query": 28.0244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1989.2567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3218, "query_norm": 1.3506, "queue_k_norm": 1.4636, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4166, "sent_len_1": 66.5933, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6725, "stdk": 0.0489, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.3549, "doc_norm": 1.4572, "encoder_q-embeddings": 506.85, "encoder_q-layer.0": 343.5723, "encoder_q-layer.1": 368.3421, "encoder_q-layer.10": 607.726, "encoder_q-layer.11": 1529.7972, "encoder_q-layer.2": 440.1221, "encoder_q-layer.3": 447.5305, "encoder_q-layer.4": 475.841, "encoder_q-layer.5": 454.9101, "encoder_q-layer.6": 473.3231, "encoder_q-layer.7": 518.4567, "encoder_q-layer.8": 654.144, "encoder_q-layer.9": 567.596, "epoch": 0.79, "inbatch_neg_score": 0.3229, "inbatch_pos_score": 0.9893, "learning_rate": 1.05e-05, "loss": 3.3549, "norm_diff": 0.0934, "norm_loss": 0.0, "num_token_doc": 66.8583, "num_token_overlap": 14.5375, "num_token_query": 37.1939, "num_token_union": 65.3862, "num_word_context": 202.2518, "num_word_doc": 49.8204, "num_word_query": 27.8057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 997.5076, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.3637, "queue_k_norm": 1.4662, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1939, "sent_len_1": 66.8583, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.445, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.365, "doc_norm": 1.4655, "encoder_q-embeddings": 626.6658, "encoder_q-layer.0": 428.1826, "encoder_q-layer.1": 477.6965, "encoder_q-layer.10": 619.1626, "encoder_q-layer.11": 1559.4323, "encoder_q-layer.2": 523.7469, "encoder_q-layer.3": 622.3109, "encoder_q-layer.4": 605.8861, "encoder_q-layer.5": 568.1431, "encoder_q-layer.6": 621.8633, "encoder_q-layer.7": 518.5444, "encoder_q-layer.8": 588.6769, "encoder_q-layer.9": 546.1795, "epoch": 0.79, "inbatch_neg_score": 0.3255, "inbatch_pos_score": 0.9609, "learning_rate": 1.0444444444444445e-05, "loss": 3.365, "norm_diff": 0.11, "norm_loss": 0.0, "num_token_doc": 66.8213, "num_token_overlap": 14.6373, "num_token_query": 37.5116, "num_token_union": 65.4362, "num_word_context": 202.6403, "num_word_doc": 49.8809, "num_word_query": 28.0782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1064.7785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.325, "query_norm": 1.3555, "queue_k_norm": 1.4644, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5116, "sent_len_1": 66.8213, "sent_len_max_0": 127.995, "sent_len_max_1": 188.0875, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3739, "doc_norm": 1.4636, "encoder_q-embeddings": 1287.1439, "encoder_q-layer.0": 971.1911, "encoder_q-layer.1": 878.6295, "encoder_q-layer.10": 609.3245, "encoder_q-layer.11": 1589.7795, "encoder_q-layer.2": 958.7799, "encoder_q-layer.3": 991.9387, "encoder_q-layer.4": 819.6087, "encoder_q-layer.5": 789.9731, "encoder_q-layer.6": 757.9907, "encoder_q-layer.7": 781.0426, "encoder_q-layer.8": 770.9483, "encoder_q-layer.9": 603.4792, "epoch": 0.79, "inbatch_neg_score": 0.3254, "inbatch_pos_score": 0.9937, "learning_rate": 1.038888888888889e-05, "loss": 3.3739, "norm_diff": 0.1101, "norm_loss": 0.0, "num_token_doc": 66.6489, "num_token_overlap": 14.6414, "num_token_query": 37.4036, "num_token_union": 65.3383, "num_word_context": 202.0435, "num_word_doc": 49.7359, "num_word_query": 27.996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1489.172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3257, "query_norm": 1.3535, "queue_k_norm": 1.4654, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4036, "sent_len_1": 66.6489, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.6675, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3687, "doc_norm": 1.4691, "encoder_q-embeddings": 778.7039, "encoder_q-layer.0": 590.015, "encoder_q-layer.1": 677.1636, "encoder_q-layer.10": 575.108, "encoder_q-layer.11": 1573.7671, "encoder_q-layer.2": 848.2839, "encoder_q-layer.3": 875.6711, "encoder_q-layer.4": 962.9724, "encoder_q-layer.5": 904.5535, "encoder_q-layer.6": 795.1884, "encoder_q-layer.7": 658.7513, "encoder_q-layer.8": 713.9621, "encoder_q-layer.9": 570.7205, "epoch": 0.79, "inbatch_neg_score": 0.3258, "inbatch_pos_score": 0.9639, "learning_rate": 1.0333333333333333e-05, "loss": 3.3687, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 14.6162, "num_token_query": 37.4487, "num_token_union": 65.3402, "num_word_context": 202.5066, "num_word_doc": 49.739, "num_word_query": 28.0523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1316.5349, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3254, "query_norm": 1.3413, "queue_k_norm": 1.4669, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4487, "sent_len_1": 66.6003, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7237, "stdk": 0.0489, "stdq": 0.0435, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3483, "doc_norm": 1.4609, "encoder_q-embeddings": 1114.9232, "encoder_q-layer.0": 743.6119, "encoder_q-layer.1": 878.4594, "encoder_q-layer.10": 642.4376, "encoder_q-layer.11": 1518.9547, "encoder_q-layer.2": 1001.3593, "encoder_q-layer.3": 979.8128, "encoder_q-layer.4": 999.9636, "encoder_q-layer.5": 980.6797, "encoder_q-layer.6": 928.8003, "encoder_q-layer.7": 799.4073, "encoder_q-layer.8": 764.2108, "encoder_q-layer.9": 607.1149, "epoch": 0.8, "inbatch_neg_score": 0.3302, "inbatch_pos_score": 0.9697, "learning_rate": 1.0277777777777777e-05, "loss": 3.3483, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.9987, "num_token_overlap": 14.7524, "num_token_query": 37.7502, "num_token_union": 65.6026, "num_word_context": 202.4598, "num_word_doc": 49.9441, "num_word_query": 28.2849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1433.3638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3293, "query_norm": 1.345, "queue_k_norm": 1.4657, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.7502, "sent_len_1": 66.9987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.905, "stdk": 0.0486, "stdq": 0.0435, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.3914, "doc_norm": 1.4636, "encoder_q-embeddings": 704.1854, "encoder_q-layer.0": 483.6652, "encoder_q-layer.1": 532.0744, "encoder_q-layer.10": 605.4569, "encoder_q-layer.11": 1629.5481, "encoder_q-layer.2": 597.3562, "encoder_q-layer.3": 617.423, "encoder_q-layer.4": 637.012, "encoder_q-layer.5": 616.0112, "encoder_q-layer.6": 640.6301, "encoder_q-layer.7": 663.2324, "encoder_q-layer.8": 696.1174, "encoder_q-layer.9": 592.132, "epoch": 0.8, "inbatch_neg_score": 0.3291, "inbatch_pos_score": 0.9644, "learning_rate": 1.0222222222222223e-05, "loss": 3.3914, "norm_diff": 0.1184, "norm_loss": 0.0, "num_token_doc": 66.9407, "num_token_overlap": 14.5914, "num_token_query": 37.3, "num_token_union": 65.4386, "num_word_context": 202.843, "num_word_doc": 49.9823, "num_word_query": 27.9071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1165.0372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3293, "query_norm": 1.3451, "queue_k_norm": 1.4658, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3, "sent_len_1": 66.9407, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9812, "stdk": 0.0487, "stdq": 0.0436, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.3543, "doc_norm": 1.4719, "encoder_q-embeddings": 525.8293, "encoder_q-layer.0": 350.83, "encoder_q-layer.1": 370.8012, "encoder_q-layer.10": 587.7173, "encoder_q-layer.11": 1558.1733, "encoder_q-layer.2": 427.1011, "encoder_q-layer.3": 426.2579, "encoder_q-layer.4": 449.4272, "encoder_q-layer.5": 451.3805, "encoder_q-layer.6": 487.4789, "encoder_q-layer.7": 549.052, "encoder_q-layer.8": 607.8947, "encoder_q-layer.9": 559.1405, "epoch": 0.8, "inbatch_neg_score": 0.3277, "inbatch_pos_score": 0.9829, "learning_rate": 1.0166666666666667e-05, "loss": 3.3543, "norm_diff": 0.1074, "norm_loss": 0.0, "num_token_doc": 66.9138, "num_token_overlap": 14.6531, "num_token_query": 37.4411, "num_token_union": 65.4733, "num_word_context": 202.6067, "num_word_doc": 49.934, "num_word_query": 28.0508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.1388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3274, "query_norm": 1.3644, "queue_k_norm": 1.4662, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4411, "sent_len_1": 66.9138, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9837, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3445, "doc_norm": 1.4752, "encoder_q-embeddings": 627.3726, "encoder_q-layer.0": 413.0898, "encoder_q-layer.1": 445.8239, "encoder_q-layer.10": 621.2513, "encoder_q-layer.11": 1625.9729, "encoder_q-layer.2": 484.7416, "encoder_q-layer.3": 495.5496, "encoder_q-layer.4": 513.3882, "encoder_q-layer.5": 507.6099, "encoder_q-layer.6": 547.4785, "encoder_q-layer.7": 547.9846, "encoder_q-layer.8": 656.0604, "encoder_q-layer.9": 588.4368, "epoch": 0.8, "inbatch_neg_score": 0.3309, "inbatch_pos_score": 1.001, "learning_rate": 1.0111111111111111e-05, "loss": 3.3445, "norm_diff": 0.1069, "norm_loss": 0.0, "num_token_doc": 66.8833, "num_token_overlap": 14.6655, "num_token_query": 37.5136, "num_token_union": 65.4858, "num_word_context": 202.1901, "num_word_doc": 49.8951, "num_word_query": 28.093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1097.8827, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3315, "query_norm": 1.3683, "queue_k_norm": 1.4669, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5136, "sent_len_1": 66.8833, "sent_len_max_0": 127.995, "sent_len_max_1": 189.6887, "stdk": 0.0491, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.373, "doc_norm": 1.4638, "encoder_q-embeddings": 856.7453, "encoder_q-layer.0": 603.6396, "encoder_q-layer.1": 641.9209, "encoder_q-layer.10": 598.7659, "encoder_q-layer.11": 1582.3427, "encoder_q-layer.2": 779.2101, "encoder_q-layer.3": 830.8696, "encoder_q-layer.4": 913.7501, "encoder_q-layer.5": 1052.0314, "encoder_q-layer.6": 1028.1461, "encoder_q-layer.7": 834.7271, "encoder_q-layer.8": 773.7355, "encoder_q-layer.9": 584.5962, "epoch": 0.8, "inbatch_neg_score": 0.3315, "inbatch_pos_score": 0.9814, "learning_rate": 1.0055555555555555e-05, "loss": 3.373, "norm_diff": 0.099, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 14.5131, "num_token_query": 37.1874, "num_token_union": 65.3916, "num_word_context": 202.4535, "num_word_doc": 49.86, "num_word_query": 27.8276, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1358.1759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3303, "query_norm": 1.3648, "queue_k_norm": 1.4694, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1874, "sent_len_1": 66.8042, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6012, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3589, "doc_norm": 1.467, "encoder_q-embeddings": 2851.3135, "encoder_q-layer.0": 2256.1792, "encoder_q-layer.1": 2635.4519, "encoder_q-layer.10": 613.6079, "encoder_q-layer.11": 1606.3065, "encoder_q-layer.2": 3287.9824, "encoder_q-layer.3": 2770.6143, "encoder_q-layer.4": 2723.0659, "encoder_q-layer.5": 2451.8472, "encoder_q-layer.6": 2404.2737, "encoder_q-layer.7": 2476.0574, "encoder_q-layer.8": 1982.0638, "encoder_q-layer.9": 746.2161, "epoch": 0.8, "inbatch_neg_score": 0.3285, "inbatch_pos_score": 0.9707, "learning_rate": 1e-05, "loss": 3.3589, "norm_diff": 0.0965, "norm_loss": 0.0, "num_token_doc": 66.749, "num_token_overlap": 14.5606, "num_token_query": 37.2379, "num_token_union": 65.334, "num_word_context": 202.1193, "num_word_doc": 49.8235, "num_word_query": 27.8998, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3540.8427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3279, "query_norm": 1.3705, "queue_k_norm": 1.4685, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2379, "sent_len_1": 66.749, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2925, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3536, "doc_norm": 1.4648, "encoder_q-embeddings": 1033.7982, "encoder_q-layer.0": 749.4906, "encoder_q-layer.1": 866.0677, "encoder_q-layer.10": 569.2695, "encoder_q-layer.11": 1506.1548, "encoder_q-layer.2": 993.1152, "encoder_q-layer.3": 1013.0967, "encoder_q-layer.4": 1143.9467, "encoder_q-layer.5": 1084.4604, "encoder_q-layer.6": 1009.8252, "encoder_q-layer.7": 888.9249, "encoder_q-layer.8": 698.3893, "encoder_q-layer.9": 585.8485, "epoch": 0.8, "inbatch_neg_score": 0.332, "inbatch_pos_score": 0.9834, "learning_rate": 9.944444444444445e-06, "loss": 3.3536, "norm_diff": 0.0908, "norm_loss": 0.0, "num_token_doc": 66.6763, "num_token_overlap": 14.6193, "num_token_query": 37.4982, "num_token_union": 65.4247, "num_word_context": 202.3716, "num_word_doc": 49.7769, "num_word_query": 28.0927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1465.9118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3325, "query_norm": 1.374, "queue_k_norm": 1.4671, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4982, "sent_len_1": 66.6763, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.8487, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3599, "doc_norm": 1.4714, "encoder_q-embeddings": 757.9521, "encoder_q-layer.0": 533.1543, "encoder_q-layer.1": 589.054, "encoder_q-layer.10": 605.2166, "encoder_q-layer.11": 1568.9849, "encoder_q-layer.2": 700.444, "encoder_q-layer.3": 713.4686, "encoder_q-layer.4": 768.5721, "encoder_q-layer.5": 756.9834, "encoder_q-layer.6": 767.3976, "encoder_q-layer.7": 714.9494, "encoder_q-layer.8": 684.2045, "encoder_q-layer.9": 600.1786, "epoch": 0.8, "inbatch_neg_score": 0.331, "inbatch_pos_score": 0.9814, "learning_rate": 9.888888888888889e-06, "loss": 3.3599, "norm_diff": 0.1003, "norm_loss": 0.0, "num_token_doc": 66.6833, "num_token_overlap": 14.6909, "num_token_query": 37.599, "num_token_union": 65.3967, "num_word_context": 202.4871, "num_word_doc": 49.7436, "num_word_query": 28.1755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1193.507, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.3711, "queue_k_norm": 1.4678, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.599, "sent_len_1": 66.6833, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2375, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.361, "doc_norm": 1.4684, "encoder_q-embeddings": 814.5799, "encoder_q-layer.0": 593.3513, "encoder_q-layer.1": 632.2681, "encoder_q-layer.10": 556.8636, "encoder_q-layer.11": 1511.833, "encoder_q-layer.2": 559.9514, "encoder_q-layer.3": 497.0417, "encoder_q-layer.4": 499.3992, "encoder_q-layer.5": 514.3284, "encoder_q-layer.6": 534.6578, "encoder_q-layer.7": 561.042, "encoder_q-layer.8": 629.324, "encoder_q-layer.9": 566.8161, "epoch": 0.8, "inbatch_neg_score": 0.3307, "inbatch_pos_score": 0.9731, "learning_rate": 9.833333333333333e-06, "loss": 3.361, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.8398, "num_token_overlap": 14.6374, "num_token_query": 37.4877, "num_token_union": 65.4701, "num_word_context": 202.5819, "num_word_doc": 49.8499, "num_word_query": 28.0794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1096.9658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3315, "query_norm": 1.355, "queue_k_norm": 1.4671, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4877, "sent_len_1": 66.8398, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.6875, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3441, "doc_norm": 1.4722, "encoder_q-embeddings": 740.9857, "encoder_q-layer.0": 501.3454, "encoder_q-layer.1": 499.4093, "encoder_q-layer.10": 603.2088, "encoder_q-layer.11": 1580.8724, "encoder_q-layer.2": 573.074, "encoder_q-layer.3": 570.6891, "encoder_q-layer.4": 588.4304, "encoder_q-layer.5": 517.2248, "encoder_q-layer.6": 537.6026, "encoder_q-layer.7": 591.6829, "encoder_q-layer.8": 683.8839, "encoder_q-layer.9": 595.2272, "epoch": 0.8, "inbatch_neg_score": 0.3315, "inbatch_pos_score": 0.9629, "learning_rate": 9.777777777777779e-06, "loss": 3.3441, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.9597, "num_token_overlap": 14.6016, "num_token_query": 37.2589, "num_token_union": 65.4359, "num_word_context": 202.268, "num_word_doc": 49.965, "num_word_query": 27.8795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1110.5571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3315, "query_norm": 1.3544, "queue_k_norm": 1.4676, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2589, "sent_len_1": 66.9597, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.37, "stdk": 0.0489, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.363, "doc_norm": 1.4664, "encoder_q-embeddings": 504.6415, "encoder_q-layer.0": 340.7856, "encoder_q-layer.1": 361.9933, "encoder_q-layer.10": 617.0222, "encoder_q-layer.11": 1634.9089, "encoder_q-layer.2": 390.7141, "encoder_q-layer.3": 403.1573, "encoder_q-layer.4": 430.2748, "encoder_q-layer.5": 440.3961, "encoder_q-layer.6": 472.4, "encoder_q-layer.7": 541.3605, "encoder_q-layer.8": 636.6984, "encoder_q-layer.9": 600.1575, "epoch": 0.81, "inbatch_neg_score": 0.3385, "inbatch_pos_score": 0.9702, "learning_rate": 9.722222222222223e-06, "loss": 3.363, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 66.3654, "num_token_overlap": 14.556, "num_token_query": 37.3242, "num_token_union": 65.1861, "num_word_context": 202.0005, "num_word_doc": 49.5363, "num_word_query": 27.9505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1039.8113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3374, "query_norm": 1.3695, "queue_k_norm": 1.4675, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3242, "sent_len_1": 66.3654, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.9087, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3553, "doc_norm": 1.4687, "encoder_q-embeddings": 736.4651, "encoder_q-layer.0": 511.5573, "encoder_q-layer.1": 541.3595, "encoder_q-layer.10": 634.7629, "encoder_q-layer.11": 1545.8247, "encoder_q-layer.2": 614.457, "encoder_q-layer.3": 721.0598, "encoder_q-layer.4": 802.9307, "encoder_q-layer.5": 756.9169, "encoder_q-layer.6": 761.3675, "encoder_q-layer.7": 641.6448, "encoder_q-layer.8": 661.0736, "encoder_q-layer.9": 585.1249, "epoch": 0.81, "inbatch_neg_score": 0.3393, "inbatch_pos_score": 0.9878, "learning_rate": 9.666666666666667e-06, "loss": 3.3553, "norm_diff": 0.1005, "norm_loss": 0.0, "num_token_doc": 66.9013, "num_token_overlap": 14.5634, "num_token_query": 37.2858, "num_token_union": 65.4872, "num_word_context": 202.5706, "num_word_doc": 49.921, "num_word_query": 27.9125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1200.1027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3386, "query_norm": 1.3682, "queue_k_norm": 1.4685, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2858, "sent_len_1": 66.9013, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.97, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3568, "doc_norm": 1.4718, "encoder_q-embeddings": 543.0933, "encoder_q-layer.0": 370.529, "encoder_q-layer.1": 387.2409, "encoder_q-layer.10": 590.2603, "encoder_q-layer.11": 1544.2189, "encoder_q-layer.2": 448.6708, "encoder_q-layer.3": 477.1466, "encoder_q-layer.4": 507.9551, "encoder_q-layer.5": 523.3607, "encoder_q-layer.6": 553.2292, "encoder_q-layer.7": 550.7302, "encoder_q-layer.8": 631.4507, "encoder_q-layer.9": 569.6815, "epoch": 0.81, "inbatch_neg_score": 0.3412, "inbatch_pos_score": 1.0049, "learning_rate": 9.61111111111111e-06, "loss": 3.3568, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.6809, "num_token_overlap": 14.5768, "num_token_query": 37.335, "num_token_union": 65.3133, "num_word_context": 202.1561, "num_word_doc": 49.7616, "num_word_query": 27.9523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1008.4736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3413, "query_norm": 1.3857, "queue_k_norm": 1.4702, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.335, "sent_len_1": 66.6809, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3487, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.3574, "doc_norm": 1.4711, "encoder_q-embeddings": 531.5121, "encoder_q-layer.0": 355.6072, "encoder_q-layer.1": 368.0106, "encoder_q-layer.10": 657.7283, "encoder_q-layer.11": 1600.9983, "encoder_q-layer.2": 421.2885, "encoder_q-layer.3": 433.1502, "encoder_q-layer.4": 491.4932, "encoder_q-layer.5": 512.9857, "encoder_q-layer.6": 543.7278, "encoder_q-layer.7": 545.7081, "encoder_q-layer.8": 631.4795, "encoder_q-layer.9": 615.4046, "epoch": 0.81, "inbatch_neg_score": 0.3435, "inbatch_pos_score": 1.0156, "learning_rate": 9.555555555555556e-06, "loss": 3.3574, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.9591, "num_token_overlap": 14.663, "num_token_query": 37.5306, "num_token_union": 65.5408, "num_word_context": 202.2031, "num_word_doc": 49.9229, "num_word_query": 28.0985, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1030.3422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3423, "query_norm": 1.3753, "queue_k_norm": 1.4711, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5306, "sent_len_1": 66.9591, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6813, "stdk": 0.0488, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3763, "doc_norm": 1.4728, "encoder_q-embeddings": 1612.3193, "encoder_q-layer.0": 1106.5002, "encoder_q-layer.1": 1362.0659, "encoder_q-layer.10": 600.2817, "encoder_q-layer.11": 1620.72, "encoder_q-layer.2": 1753.5724, "encoder_q-layer.3": 1810.4851, "encoder_q-layer.4": 2157.1958, "encoder_q-layer.5": 2031.9241, "encoder_q-layer.6": 1684.6825, "encoder_q-layer.7": 1231.3733, "encoder_q-layer.8": 906.9037, "encoder_q-layer.9": 566.1707, "epoch": 0.81, "inbatch_neg_score": 0.3457, "inbatch_pos_score": 0.9961, "learning_rate": 9.5e-06, "loss": 3.3763, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.787, "num_token_overlap": 14.5504, "num_token_query": 37.4429, "num_token_union": 65.4742, "num_word_context": 202.339, "num_word_doc": 49.8083, "num_word_query": 28.0492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2216.1427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3469, "query_norm": 1.377, "queue_k_norm": 1.4689, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4429, "sent_len_1": 66.787, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.985, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.3611, "doc_norm": 1.471, "encoder_q-embeddings": 1536.0205, "encoder_q-layer.0": 1058.0045, "encoder_q-layer.1": 1301.9187, "encoder_q-layer.10": 1227.4087, "encoder_q-layer.11": 3298.8794, "encoder_q-layer.2": 1537.576, "encoder_q-layer.3": 1527.2504, "encoder_q-layer.4": 1640.6882, "encoder_q-layer.5": 1467.119, "encoder_q-layer.6": 1432.7097, "encoder_q-layer.7": 1259.675, "encoder_q-layer.8": 1335.0959, "encoder_q-layer.9": 1179.8153, "epoch": 0.81, "inbatch_neg_score": 0.3482, "inbatch_pos_score": 0.9819, "learning_rate": 9.444444444444445e-06, "loss": 3.3611, "norm_diff": 0.1042, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 14.5988, "num_token_query": 37.4506, "num_token_union": 65.4068, "num_word_context": 202.4566, "num_word_doc": 49.8431, "num_word_query": 28.0417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2527.5833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3472, "query_norm": 1.3668, "queue_k_norm": 1.4705, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4506, "sent_len_1": 66.7773, "sent_len_max_0": 128.0, "sent_len_max_1": 192.05, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.364, "doc_norm": 1.465, "encoder_q-embeddings": 1580.9606, "encoder_q-layer.0": 1243.5917, "encoder_q-layer.1": 1460.3547, "encoder_q-layer.10": 1174.4161, "encoder_q-layer.11": 2958.0273, "encoder_q-layer.2": 1806.0447, "encoder_q-layer.3": 1823.366, "encoder_q-layer.4": 1699.8883, "encoder_q-layer.5": 1493.2911, "encoder_q-layer.6": 1538.4241, "encoder_q-layer.7": 1315.7544, "encoder_q-layer.8": 1298.4036, "encoder_q-layer.9": 1146.5243, "epoch": 0.81, "inbatch_neg_score": 0.3485, "inbatch_pos_score": 0.9937, "learning_rate": 9.388888888888889e-06, "loss": 3.364, "norm_diff": 0.0907, "norm_loss": 0.0, "num_token_doc": 66.7784, "num_token_overlap": 14.5668, "num_token_query": 37.2751, "num_token_union": 65.352, "num_word_context": 202.0038, "num_word_doc": 49.8178, "num_word_query": 27.914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2525.2157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3481, "query_norm": 1.3743, "queue_k_norm": 1.4714, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2751, "sent_len_1": 66.7784, "sent_len_max_0": 128.0, "sent_len_max_1": 190.535, "stdk": 0.0485, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3609, "doc_norm": 1.4807, "encoder_q-embeddings": 2054.7617, "encoder_q-layer.0": 1360.4225, "encoder_q-layer.1": 1524.6373, "encoder_q-layer.10": 1149.5197, "encoder_q-layer.11": 2967.5837, "encoder_q-layer.2": 1791.0741, "encoder_q-layer.3": 2021.704, "encoder_q-layer.4": 2036.4442, "encoder_q-layer.5": 1888.6399, "encoder_q-layer.6": 1903.7262, "encoder_q-layer.7": 1884.2578, "encoder_q-layer.8": 1778.3594, "encoder_q-layer.9": 1174.2957, "epoch": 0.81, "inbatch_neg_score": 0.3524, "inbatch_pos_score": 1.0234, "learning_rate": 9.333333333333334e-06, "loss": 3.3609, "norm_diff": 0.1008, "norm_loss": 0.0, "num_token_doc": 66.7465, "num_token_overlap": 14.592, "num_token_query": 37.3362, "num_token_union": 65.3402, "num_word_context": 201.9729, "num_word_doc": 49.7946, "num_word_query": 27.9623, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2897.2093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3525, "query_norm": 1.3799, "queue_k_norm": 1.4717, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3362, "sent_len_1": 66.7465, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6662, "stdk": 0.0491, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3528, "doc_norm": 1.4681, "encoder_q-embeddings": 1013.5601, "encoder_q-layer.0": 681.3225, "encoder_q-layer.1": 681.3823, "encoder_q-layer.10": 1205.8826, "encoder_q-layer.11": 3138.3547, "encoder_q-layer.2": 763.249, "encoder_q-layer.3": 768.3534, "encoder_q-layer.4": 817.9583, "encoder_q-layer.5": 843.7731, "encoder_q-layer.6": 947.0864, "encoder_q-layer.7": 1115.1343, "encoder_q-layer.8": 1322.1874, "encoder_q-layer.9": 1198.5967, "epoch": 0.81, "inbatch_neg_score": 0.3536, "inbatch_pos_score": 0.9873, "learning_rate": 9.277777777777778e-06, "loss": 3.3528, "norm_diff": 0.0868, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 14.539, "num_token_query": 37.1027, "num_token_union": 65.2988, "num_word_context": 202.0135, "num_word_doc": 49.847, "num_word_query": 27.7738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1993.8514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3555, "query_norm": 1.3813, "queue_k_norm": 1.473, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1027, "sent_len_1": 66.8035, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6438, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3506, "doc_norm": 1.475, "encoder_q-embeddings": 1094.9791, "encoder_q-layer.0": 766.5468, "encoder_q-layer.1": 796.4825, "encoder_q-layer.10": 1208.2888, "encoder_q-layer.11": 3071.1392, "encoder_q-layer.2": 944.9586, "encoder_q-layer.3": 960.6557, "encoder_q-layer.4": 1009.2301, "encoder_q-layer.5": 1022.6888, "encoder_q-layer.6": 1037.8325, "encoder_q-layer.7": 1098.0023, "encoder_q-layer.8": 1276.8468, "encoder_q-layer.9": 1167.9036, "epoch": 0.81, "inbatch_neg_score": 0.3533, "inbatch_pos_score": 1.0244, "learning_rate": 9.222222222222222e-06, "loss": 3.3506, "norm_diff": 0.0885, "norm_loss": 0.0, "num_token_doc": 67.0363, "num_token_overlap": 14.6112, "num_token_query": 37.3711, "num_token_union": 65.5216, "num_word_context": 202.4025, "num_word_doc": 49.9932, "num_word_query": 27.9863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.6006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.353, "query_norm": 1.3865, "queue_k_norm": 1.4742, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3711, "sent_len_1": 67.0363, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0588, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3428, "doc_norm": 1.473, "encoder_q-embeddings": 1048.4259, "encoder_q-layer.0": 704.3588, "encoder_q-layer.1": 743.6526, "encoder_q-layer.10": 1219.5369, "encoder_q-layer.11": 3380.814, "encoder_q-layer.2": 817.6828, "encoder_q-layer.3": 860.5802, "encoder_q-layer.4": 895.1747, "encoder_q-layer.5": 899.9283, "encoder_q-layer.6": 1016.2956, "encoder_q-layer.7": 1057.54, "encoder_q-layer.8": 1229.8633, "encoder_q-layer.9": 1132.8224, "epoch": 0.82, "inbatch_neg_score": 0.3583, "inbatch_pos_score": 0.9927, "learning_rate": 9.166666666666666e-06, "loss": 3.3428, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 66.6603, "num_token_overlap": 14.6674, "num_token_query": 37.4754, "num_token_union": 65.3209, "num_word_context": 201.8701, "num_word_doc": 49.7211, "num_word_query": 28.0743, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2096.3825, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3567, "query_norm": 1.3641, "queue_k_norm": 1.4722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4754, "sent_len_1": 66.6603, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8738, "stdk": 0.0487, "stdq": 0.0439, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3531, "doc_norm": 1.4855, "encoder_q-embeddings": 1100.1116, "encoder_q-layer.0": 736.8114, "encoder_q-layer.1": 780.7442, "encoder_q-layer.10": 1213.7039, "encoder_q-layer.11": 3234.3462, "encoder_q-layer.2": 884.2753, "encoder_q-layer.3": 888.8771, "encoder_q-layer.4": 938.8492, "encoder_q-layer.5": 1001.6158, "encoder_q-layer.6": 1126.7383, "encoder_q-layer.7": 1148.9227, "encoder_q-layer.8": 1321.1799, "encoder_q-layer.9": 1207.827, "epoch": 0.82, "inbatch_neg_score": 0.3542, "inbatch_pos_score": 1.0068, "learning_rate": 9.111111111111112e-06, "loss": 3.3531, "norm_diff": 0.1138, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 14.6306, "num_token_query": 37.4763, "num_token_union": 65.4323, "num_word_context": 202.3553, "num_word_doc": 49.8294, "num_word_query": 28.0614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2097.2412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3552, "query_norm": 1.3717, "queue_k_norm": 1.4752, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4763, "sent_len_1": 66.7993, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8575, "stdk": 0.0492, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3415, "doc_norm": 1.4764, "encoder_q-embeddings": 1281.8414, "encoder_q-layer.0": 937.9027, "encoder_q-layer.1": 996.1969, "encoder_q-layer.10": 1262.5719, "encoder_q-layer.11": 3063.1597, "encoder_q-layer.2": 1126.3605, "encoder_q-layer.3": 1139.1738, "encoder_q-layer.4": 1107.9192, "encoder_q-layer.5": 1061.4785, "encoder_q-layer.6": 1155.9446, "encoder_q-layer.7": 1239.155, "encoder_q-layer.8": 1300.0369, "encoder_q-layer.9": 1163.4897, "epoch": 0.82, "inbatch_neg_score": 0.3563, "inbatch_pos_score": 1.0176, "learning_rate": 9.055555555555556e-06, "loss": 3.3415, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.8698, "num_token_overlap": 14.6459, "num_token_query": 37.3683, "num_token_union": 65.4206, "num_word_context": 202.1336, "num_word_doc": 49.8545, "num_word_query": 27.9786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2150.0759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3564, "query_norm": 1.3684, "queue_k_norm": 1.4757, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3683, "sent_len_1": 66.8698, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4725, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3657, "doc_norm": 1.4754, "encoder_q-embeddings": 920.0298, "encoder_q-layer.0": 627.7829, "encoder_q-layer.1": 665.2889, "encoder_q-layer.10": 1338.6342, "encoder_q-layer.11": 3109.9141, "encoder_q-layer.2": 745.5815, "encoder_q-layer.3": 745.319, "encoder_q-layer.4": 767.3468, "encoder_q-layer.5": 774.9888, "encoder_q-layer.6": 914.58, "encoder_q-layer.7": 1004.134, "encoder_q-layer.8": 1250.1512, "encoder_q-layer.9": 1142.2417, "epoch": 0.82, "inbatch_neg_score": 0.3628, "inbatch_pos_score": 1.001, "learning_rate": 9e-06, "loss": 3.3657, "norm_diff": 0.1216, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 14.6226, "num_token_query": 37.3926, "num_token_union": 65.4167, "num_word_context": 202.6107, "num_word_doc": 49.8425, "num_word_query": 28.005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1942.1351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3618, "query_norm": 1.3538, "queue_k_norm": 1.4761, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3926, "sent_len_1": 66.7796, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.2163, "stdk": 0.0488, "stdq": 0.0434, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3493, "doc_norm": 1.4738, "encoder_q-embeddings": 1086.2662, "encoder_q-layer.0": 777.9816, "encoder_q-layer.1": 820.3723, "encoder_q-layer.10": 1151.0726, "encoder_q-layer.11": 3095.4634, "encoder_q-layer.2": 959.8345, "encoder_q-layer.3": 991.6874, "encoder_q-layer.4": 955.0814, "encoder_q-layer.5": 936.9191, "encoder_q-layer.6": 1068.4019, "encoder_q-layer.7": 1245.5028, "encoder_q-layer.8": 1271.1288, "encoder_q-layer.9": 1115.0536, "epoch": 0.82, "inbatch_neg_score": 0.3621, "inbatch_pos_score": 1.001, "learning_rate": 8.944444444444444e-06, "loss": 3.3493, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 66.6591, "num_token_overlap": 14.5993, "num_token_query": 37.4111, "num_token_union": 65.3479, "num_word_context": 202.1168, "num_word_doc": 49.7503, "num_word_query": 28.0166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2059.6502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.3672, "queue_k_norm": 1.4752, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4111, "sent_len_1": 66.6591, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.62, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3283, "doc_norm": 1.4711, "encoder_q-embeddings": 1578.0265, "encoder_q-layer.0": 1146.0972, "encoder_q-layer.1": 1220.6371, "encoder_q-layer.10": 1241.9626, "encoder_q-layer.11": 3175.8149, "encoder_q-layer.2": 1334.8512, "encoder_q-layer.3": 1288.0093, "encoder_q-layer.4": 1383.6798, "encoder_q-layer.5": 1233.2466, "encoder_q-layer.6": 1342.0818, "encoder_q-layer.7": 1503.4659, "encoder_q-layer.8": 1398.6401, "encoder_q-layer.9": 1176.162, "epoch": 0.82, "inbatch_neg_score": 0.3628, "inbatch_pos_score": 1.0029, "learning_rate": 8.88888888888889e-06, "loss": 3.3283, "norm_diff": 0.0903, "norm_loss": 0.0, "num_token_doc": 67.0622, "num_token_overlap": 14.6759, "num_token_query": 37.4674, "num_token_union": 65.522, "num_word_context": 202.8653, "num_word_doc": 50.0871, "num_word_query": 28.0911, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2403.6297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.3808, "queue_k_norm": 1.4789, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4674, "sent_len_1": 67.0622, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1238, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3543, "doc_norm": 1.4793, "encoder_q-embeddings": 1021.4795, "encoder_q-layer.0": 653.0995, "encoder_q-layer.1": 659.0086, "encoder_q-layer.10": 1218.1561, "encoder_q-layer.11": 3162.5615, "encoder_q-layer.2": 718.9222, "encoder_q-layer.3": 747.6566, "encoder_q-layer.4": 762.939, "encoder_q-layer.5": 783.1917, "encoder_q-layer.6": 861.3879, "encoder_q-layer.7": 972.6848, "encoder_q-layer.8": 1299.1228, "encoder_q-layer.9": 1141.219, "epoch": 0.82, "inbatch_neg_score": 0.3641, "inbatch_pos_score": 1.002, "learning_rate": 8.833333333333334e-06, "loss": 3.3543, "norm_diff": 0.1171, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 14.6453, "num_token_query": 37.4308, "num_token_union": 65.4449, "num_word_context": 202.6341, "num_word_doc": 49.8594, "num_word_query": 28.0222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1959.9529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3647, "query_norm": 1.3623, "queue_k_norm": 1.4794, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4308, "sent_len_1": 66.8494, "sent_len_max_0": 128.0, "sent_len_max_1": 190.345, "stdk": 0.0489, "stdq": 0.0438, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3577, "doc_norm": 1.479, "encoder_q-embeddings": 1127.973, "encoder_q-layer.0": 763.3211, "encoder_q-layer.1": 814.1202, "encoder_q-layer.10": 1252.8007, "encoder_q-layer.11": 3191.9058, "encoder_q-layer.2": 938.3428, "encoder_q-layer.3": 1035.9963, "encoder_q-layer.4": 1041.9918, "encoder_q-layer.5": 1065.9708, "encoder_q-layer.6": 1079.8048, "encoder_q-layer.7": 1115.4307, "encoder_q-layer.8": 1239.1992, "encoder_q-layer.9": 1137.6312, "epoch": 0.82, "inbatch_neg_score": 0.3652, "inbatch_pos_score": 1.0078, "learning_rate": 8.777777777777778e-06, "loss": 3.3577, "norm_diff": 0.1093, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 14.6413, "num_token_query": 37.4679, "num_token_union": 65.4736, "num_word_context": 202.328, "num_word_doc": 49.8618, "num_word_query": 28.073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.3302, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.365, "query_norm": 1.3697, "queue_k_norm": 1.4772, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4679, "sent_len_1": 66.8578, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0325, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3651, "doc_norm": 1.4792, "encoder_q-embeddings": 990.215, "encoder_q-layer.0": 680.5499, "encoder_q-layer.1": 733.5684, "encoder_q-layer.10": 1164.9723, "encoder_q-layer.11": 3160.5674, "encoder_q-layer.2": 802.645, "encoder_q-layer.3": 813.9026, "encoder_q-layer.4": 885.2133, "encoder_q-layer.5": 902.6725, "encoder_q-layer.6": 1000.0963, "encoder_q-layer.7": 1070.4288, "encoder_q-layer.8": 1255.6206, "encoder_q-layer.9": 1128.2321, "epoch": 0.82, "inbatch_neg_score": 0.3658, "inbatch_pos_score": 1.0176, "learning_rate": 8.722222222222224e-06, "loss": 3.3651, "norm_diff": 0.1142, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 14.618, "num_token_query": 37.3014, "num_token_union": 65.2692, "num_word_context": 202.3511, "num_word_doc": 49.7642, "num_word_query": 27.9007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2012.8901, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.365, "queue_k_norm": 1.4789, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3014, "sent_len_1": 66.6695, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1275, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3692, "doc_norm": 1.4785, "encoder_q-embeddings": 1396.8716, "encoder_q-layer.0": 936.3965, "encoder_q-layer.1": 1025.3556, "encoder_q-layer.10": 1195.5853, "encoder_q-layer.11": 3031.9102, "encoder_q-layer.2": 1186.6158, "encoder_q-layer.3": 1232.1196, "encoder_q-layer.4": 1333.0354, "encoder_q-layer.5": 1483.913, "encoder_q-layer.6": 1440.0413, "encoder_q-layer.7": 1305.4188, "encoder_q-layer.8": 1367.9092, "encoder_q-layer.9": 1092.4373, "epoch": 0.82, "inbatch_neg_score": 0.3687, "inbatch_pos_score": 1.0166, "learning_rate": 8.666666666666668e-06, "loss": 3.3692, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 66.6127, "num_token_overlap": 14.5873, "num_token_query": 37.2802, "num_token_union": 65.2194, "num_word_context": 202.2139, "num_word_doc": 49.6657, "num_word_query": 27.897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2266.2297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3682, "query_norm": 1.3817, "queue_k_norm": 1.4782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2802, "sent_len_1": 66.6127, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9613, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.37, "doc_norm": 1.4767, "encoder_q-embeddings": 2352.7766, "encoder_q-layer.0": 1550.6311, "encoder_q-layer.1": 1890.9961, "encoder_q-layer.10": 1205.5244, "encoder_q-layer.11": 3059.7576, "encoder_q-layer.2": 2236.2861, "encoder_q-layer.3": 2313.9695, "encoder_q-layer.4": 2410.3523, "encoder_q-layer.5": 2582.9407, "encoder_q-layer.6": 2521.5183, "encoder_q-layer.7": 2305.3591, "encoder_q-layer.8": 1735.139, "encoder_q-layer.9": 1182.2437, "epoch": 0.82, "inbatch_neg_score": 0.3655, "inbatch_pos_score": 1.0127, "learning_rate": 8.611111111111112e-06, "loss": 3.37, "norm_diff": 0.104, "norm_loss": 0.0, "num_token_doc": 66.7831, "num_token_overlap": 14.4808, "num_token_query": 37.2541, "num_token_union": 65.4447, "num_word_context": 202.6478, "num_word_doc": 49.8055, "num_word_query": 27.861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3261.9132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.365, "query_norm": 1.3727, "queue_k_norm": 1.4791, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2541, "sent_len_1": 66.7831, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6425, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3704, "doc_norm": 1.4829, "encoder_q-embeddings": 1107.6349, "encoder_q-layer.0": 715.9708, "encoder_q-layer.1": 794.8956, "encoder_q-layer.10": 1181.7366, "encoder_q-layer.11": 3079.2297, "encoder_q-layer.2": 895.5256, "encoder_q-layer.3": 866.8849, "encoder_q-layer.4": 918.5335, "encoder_q-layer.5": 936.6445, "encoder_q-layer.6": 967.8555, "encoder_q-layer.7": 1054.2111, "encoder_q-layer.8": 1188.0457, "encoder_q-layer.9": 1073.7343, "epoch": 0.83, "inbatch_neg_score": 0.365, "inbatch_pos_score": 1.0264, "learning_rate": 8.555555555555556e-06, "loss": 3.3704, "norm_diff": 0.1055, "norm_loss": 0.0, "num_token_doc": 66.7143, "num_token_overlap": 14.5181, "num_token_query": 37.0331, "num_token_union": 65.1847, "num_word_context": 201.8884, "num_word_doc": 49.7619, "num_word_query": 27.711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2038.0341, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3652, "query_norm": 1.3775, "queue_k_norm": 1.4786, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0331, "sent_len_1": 66.7143, "sent_len_max_0": 127.9813, "sent_len_max_1": 189.7125, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3771, "doc_norm": 1.4764, "encoder_q-embeddings": 1053.1288, "encoder_q-layer.0": 680.3629, "encoder_q-layer.1": 729.0381, "encoder_q-layer.10": 1365.2308, "encoder_q-layer.11": 3249.866, "encoder_q-layer.2": 821.5028, "encoder_q-layer.3": 851.6417, "encoder_q-layer.4": 918.4552, "encoder_q-layer.5": 884.8257, "encoder_q-layer.6": 1020.7938, "encoder_q-layer.7": 1131.2732, "encoder_q-layer.8": 1336.4227, "encoder_q-layer.9": 1200.7698, "epoch": 0.83, "inbatch_neg_score": 0.3668, "inbatch_pos_score": 1.0029, "learning_rate": 8.500000000000002e-06, "loss": 3.3771, "norm_diff": 0.0995, "norm_loss": 0.0, "num_token_doc": 66.6322, "num_token_overlap": 14.5288, "num_token_query": 37.3605, "num_token_union": 65.3849, "num_word_context": 202.1075, "num_word_doc": 49.7441, "num_word_query": 27.965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2079.6882, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3665, "query_norm": 1.3769, "queue_k_norm": 1.4784, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3605, "sent_len_1": 66.6322, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.92, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.368, "doc_norm": 1.48, "encoder_q-embeddings": 1122.6223, "encoder_q-layer.0": 768.8199, "encoder_q-layer.1": 800.8922, "encoder_q-layer.10": 1346.0994, "encoder_q-layer.11": 3069.79, "encoder_q-layer.2": 929.4318, "encoder_q-layer.3": 958.2244, "encoder_q-layer.4": 998.2936, "encoder_q-layer.5": 1035.8202, "encoder_q-layer.6": 1120.5999, "encoder_q-layer.7": 1157.2098, "encoder_q-layer.8": 1236.8521, "encoder_q-layer.9": 1138.4757, "epoch": 0.83, "inbatch_neg_score": 0.3653, "inbatch_pos_score": 1.0234, "learning_rate": 8.444444444444446e-06, "loss": 3.368, "norm_diff": 0.104, "norm_loss": 0.0, "num_token_doc": 66.7241, "num_token_overlap": 14.5739, "num_token_query": 37.4211, "num_token_union": 65.4677, "num_word_context": 202.2052, "num_word_doc": 49.855, "num_word_query": 28.0053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2038.5143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3662, "query_norm": 1.376, "queue_k_norm": 1.4788, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4211, "sent_len_1": 66.7241, "sent_len_max_0": 127.99, "sent_len_max_1": 187.1, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3676, "doc_norm": 1.4857, "encoder_q-embeddings": 1393.1018, "encoder_q-layer.0": 914.6519, "encoder_q-layer.1": 1006.4193, "encoder_q-layer.10": 1280.8326, "encoder_q-layer.11": 3045.4954, "encoder_q-layer.2": 1212.4542, "encoder_q-layer.3": 1233.4183, "encoder_q-layer.4": 1287.7407, "encoder_q-layer.5": 1288.5912, "encoder_q-layer.6": 1358.0348, "encoder_q-layer.7": 1429.6514, "encoder_q-layer.8": 1468.2195, "encoder_q-layer.9": 1119.9993, "epoch": 0.83, "inbatch_neg_score": 0.3643, "inbatch_pos_score": 1.0107, "learning_rate": 8.38888888888889e-06, "loss": 3.3676, "norm_diff": 0.1242, "norm_loss": 0.0, "num_token_doc": 66.9465, "num_token_overlap": 14.5849, "num_token_query": 37.3623, "num_token_union": 65.5449, "num_word_context": 202.5277, "num_word_doc": 49.939, "num_word_query": 27.9759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2279.3323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3652, "query_norm": 1.3615, "queue_k_norm": 1.48, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3623, "sent_len_1": 66.9465, "sent_len_max_0": 128.0, "sent_len_max_1": 190.275, "stdk": 0.049, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3531, "doc_norm": 1.4758, "encoder_q-embeddings": 4526.1367, "encoder_q-layer.0": 3272.0112, "encoder_q-layer.1": 3739.4543, "encoder_q-layer.10": 2249.2424, "encoder_q-layer.11": 6089.8862, "encoder_q-layer.2": 4804.0391, "encoder_q-layer.3": 4981.3018, "encoder_q-layer.4": 4889.8525, "encoder_q-layer.5": 4635.9399, "encoder_q-layer.6": 3907.9607, "encoder_q-layer.7": 4025.9827, "encoder_q-layer.8": 3489.1265, "encoder_q-layer.9": 2339.1611, "epoch": 0.83, "inbatch_neg_score": 0.3647, "inbatch_pos_score": 1.0098, "learning_rate": 8.333333333333334e-06, "loss": 3.3531, "norm_diff": 0.1093, "norm_loss": 0.0, "num_token_doc": 66.8731, "num_token_overlap": 14.4976, "num_token_query": 37.1604, "num_token_union": 65.418, "num_word_context": 202.6516, "num_word_doc": 49.8885, "num_word_query": 27.813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6388.0616, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3643, "query_norm": 1.3666, "queue_k_norm": 1.4797, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1604, "sent_len_1": 66.8731, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.5462, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.3677, "doc_norm": 1.4829, "encoder_q-embeddings": 2829.3765, "encoder_q-layer.0": 1905.8457, "encoder_q-layer.1": 2103.9524, "encoder_q-layer.10": 2526.9609, "encoder_q-layer.11": 6353.9014, "encoder_q-layer.2": 2515.54, "encoder_q-layer.3": 2479.781, "encoder_q-layer.4": 2696.5894, "encoder_q-layer.5": 2717.0693, "encoder_q-layer.6": 2775.5188, "encoder_q-layer.7": 2874.1343, "encoder_q-layer.8": 3042.2563, "encoder_q-layer.9": 2487.1953, "epoch": 0.83, "inbatch_neg_score": 0.3642, "inbatch_pos_score": 0.9946, "learning_rate": 8.27777777777778e-06, "loss": 3.3677, "norm_diff": 0.1303, "norm_loss": 0.0, "num_token_doc": 66.4498, "num_token_overlap": 14.6318, "num_token_query": 37.4438, "num_token_union": 65.1913, "num_word_context": 201.9336, "num_word_doc": 49.5963, "num_word_query": 28.0277, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4707.7535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.3526, "queue_k_norm": 1.4803, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4438, "sent_len_1": 66.4498, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.4038, "stdk": 0.0489, "stdq": 0.0436, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.356, "doc_norm": 1.4864, "encoder_q-embeddings": 3250.4924, "encoder_q-layer.0": 2421.8064, "encoder_q-layer.1": 2805.489, "encoder_q-layer.10": 2212.0911, "encoder_q-layer.11": 6142.6738, "encoder_q-layer.2": 3381.7397, "encoder_q-layer.3": 3396.0471, "encoder_q-layer.4": 3403.6934, "encoder_q-layer.5": 2887.9644, "encoder_q-layer.6": 2788.3591, "encoder_q-layer.7": 2692.4163, "encoder_q-layer.8": 2601.812, "encoder_q-layer.9": 2111.1826, "epoch": 0.83, "inbatch_neg_score": 0.365, "inbatch_pos_score": 1.0146, "learning_rate": 8.222222222222223e-06, "loss": 3.356, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.6385, "num_token_overlap": 14.6137, "num_token_query": 37.3671, "num_token_union": 65.2663, "num_word_context": 201.891, "num_word_doc": 49.764, "num_word_query": 27.9763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5010.0516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3643, "query_norm": 1.3644, "queue_k_norm": 1.4815, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3671, "sent_len_1": 66.6385, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4325, "stdk": 0.049, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3605, "doc_norm": 1.479, "encoder_q-embeddings": 2539.0371, "encoder_q-layer.0": 1705.6301, "encoder_q-layer.1": 2012.499, "encoder_q-layer.10": 2648.9395, "encoder_q-layer.11": 6359.0552, "encoder_q-layer.2": 2286.0842, "encoder_q-layer.3": 2395.4897, "encoder_q-layer.4": 2591.4126, "encoder_q-layer.5": 2725.3499, "encoder_q-layer.6": 2907.072, "encoder_q-layer.7": 2932.4827, "encoder_q-layer.8": 2812.2002, "encoder_q-layer.9": 2315.8774, "epoch": 0.83, "inbatch_neg_score": 0.3632, "inbatch_pos_score": 1.0166, "learning_rate": 8.166666666666668e-06, "loss": 3.3605, "norm_diff": 0.1046, "norm_loss": 0.0, "num_token_doc": 66.6326, "num_token_overlap": 14.5692, "num_token_query": 37.3384, "num_token_union": 65.2928, "num_word_context": 202.4742, "num_word_doc": 49.729, "num_word_query": 27.9254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4487.7477, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3638, "query_norm": 1.3744, "queue_k_norm": 1.4796, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3384, "sent_len_1": 66.6326, "sent_len_max_0": 128.0, "sent_len_max_1": 189.935, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.3831, "doc_norm": 1.4734, "encoder_q-embeddings": 1929.4425, "encoder_q-layer.0": 1321.4019, "encoder_q-layer.1": 1369.9778, "encoder_q-layer.10": 2292.3025, "encoder_q-layer.11": 6185.2241, "encoder_q-layer.2": 1544.903, "encoder_q-layer.3": 1582.9866, "encoder_q-layer.4": 1646.6385, "encoder_q-layer.5": 1660.749, "encoder_q-layer.6": 1904.0475, "encoder_q-layer.7": 2157.9556, "encoder_q-layer.8": 2331.3176, "encoder_q-layer.9": 2189.2183, "epoch": 0.83, "inbatch_neg_score": 0.3663, "inbatch_pos_score": 0.9834, "learning_rate": 8.111111111111112e-06, "loss": 3.3831, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 66.5432, "num_token_overlap": 14.521, "num_token_query": 37.2323, "num_token_union": 65.2194, "num_word_context": 202.0508, "num_word_doc": 49.6432, "num_word_query": 27.8715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3852.2658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3655, "query_norm": 1.3483, "queue_k_norm": 1.4811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2323, "sent_len_1": 66.5432, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9162, "stdk": 0.0485, "stdq": 0.0434, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3634, "doc_norm": 1.4792, "encoder_q-embeddings": 2095.8694, "encoder_q-layer.0": 1421.7133, "encoder_q-layer.1": 1496.7308, "encoder_q-layer.10": 2252.8501, "encoder_q-layer.11": 6375.0273, "encoder_q-layer.2": 1698.9996, "encoder_q-layer.3": 1731.9565, "encoder_q-layer.4": 1804.9636, "encoder_q-layer.5": 1816.8984, "encoder_q-layer.6": 1951.1123, "encoder_q-layer.7": 2040.2273, "encoder_q-layer.8": 2453.4138, "encoder_q-layer.9": 2260.0835, "epoch": 0.83, "inbatch_neg_score": 0.3661, "inbatch_pos_score": 1.0254, "learning_rate": 8.055555555555557e-06, "loss": 3.3634, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 14.6279, "num_token_query": 37.6065, "num_token_union": 65.538, "num_word_context": 202.3662, "num_word_doc": 49.8011, "num_word_query": 28.169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3990.8538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3667, "query_norm": 1.3593, "queue_k_norm": 1.4816, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6065, "sent_len_1": 66.8173, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.4712, "stdk": 0.0488, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3438, "doc_norm": 1.4823, "encoder_q-embeddings": 2274.3049, "encoder_q-layer.0": 1490.2587, "encoder_q-layer.1": 1530.0919, "encoder_q-layer.10": 2327.8789, "encoder_q-layer.11": 6434.9346, "encoder_q-layer.2": 1679.8461, "encoder_q-layer.3": 1815.9604, "encoder_q-layer.4": 1810.5107, "encoder_q-layer.5": 1808.2167, "encoder_q-layer.6": 2014.702, "encoder_q-layer.7": 2229.5015, "encoder_q-layer.8": 2514.0828, "encoder_q-layer.9": 2273.7522, "epoch": 0.84, "inbatch_neg_score": 0.3677, "inbatch_pos_score": 1.0078, "learning_rate": 8.000000000000001e-06, "loss": 3.3438, "norm_diff": 0.1106, "norm_loss": 0.0, "num_token_doc": 66.8608, "num_token_overlap": 14.622, "num_token_query": 37.3685, "num_token_union": 65.4462, "num_word_context": 202.2949, "num_word_doc": 49.896, "num_word_query": 27.9591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4077.5074, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3674, "query_norm": 1.3717, "queue_k_norm": 1.4814, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3685, "sent_len_1": 66.8608, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9162, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.3325, "doc_norm": 1.4847, "encoder_q-embeddings": 7816.0996, "encoder_q-layer.0": 6102.4854, "encoder_q-layer.1": 7134.5605, "encoder_q-layer.10": 2193.677, "encoder_q-layer.11": 5791.8848, "encoder_q-layer.2": 6824.9214, "encoder_q-layer.3": 8099.3848, "encoder_q-layer.4": 9412.251, "encoder_q-layer.5": 7163.7642, "encoder_q-layer.6": 6381.2661, "encoder_q-layer.7": 5436.5352, "encoder_q-layer.8": 4135.2275, "encoder_q-layer.9": 2458.2407, "epoch": 0.84, "inbatch_neg_score": 0.3668, "inbatch_pos_score": 1.0518, "learning_rate": 7.944444444444445e-06, "loss": 3.3325, "norm_diff": 0.0915, "norm_loss": 0.0, "num_token_doc": 66.7048, "num_token_overlap": 14.5673, "num_token_query": 37.3287, "num_token_union": 65.343, "num_word_context": 202.2438, "num_word_doc": 49.7918, "num_word_query": 27.9324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9708.4414, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3674, "query_norm": 1.3932, "queue_k_norm": 1.4838, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3287, "sent_len_1": 66.7048, "sent_len_max_0": 128.0, "sent_len_max_1": 189.615, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.3485, "doc_norm": 1.4755, "encoder_q-embeddings": 2530.2932, "encoder_q-layer.0": 1769.219, "encoder_q-layer.1": 1989.3406, "encoder_q-layer.10": 2362.8938, "encoder_q-layer.11": 6341.4917, "encoder_q-layer.2": 2324.0378, "encoder_q-layer.3": 2294.8359, "encoder_q-layer.4": 2221.9363, "encoder_q-layer.5": 2364.4363, "encoder_q-layer.6": 2227.9751, "encoder_q-layer.7": 2535.2664, "encoder_q-layer.8": 2606.9011, "encoder_q-layer.9": 2264.2786, "epoch": 0.84, "inbatch_neg_score": 0.3688, "inbatch_pos_score": 0.999, "learning_rate": 7.88888888888889e-06, "loss": 3.3485, "norm_diff": 0.1166, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 14.6072, "num_token_query": 37.4304, "num_token_union": 65.4258, "num_word_context": 202.3284, "num_word_doc": 49.8122, "num_word_query": 28.0271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4381.4154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3672, "query_norm": 1.359, "queue_k_norm": 1.4816, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4304, "sent_len_1": 66.777, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.4325, "stdk": 0.0485, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3578, "doc_norm": 1.4781, "encoder_q-embeddings": 4518.3179, "encoder_q-layer.0": 3405.4272, "encoder_q-layer.1": 4117.9429, "encoder_q-layer.10": 2623.9099, "encoder_q-layer.11": 6624.4053, "encoder_q-layer.2": 4782.6445, "encoder_q-layer.3": 4822.3589, "encoder_q-layer.4": 5090.354, "encoder_q-layer.5": 5089.4082, "encoder_q-layer.6": 5202.3838, "encoder_q-layer.7": 4637.6733, "encoder_q-layer.8": 4043.3196, "encoder_q-layer.9": 2387.6128, "epoch": 0.84, "inbatch_neg_score": 0.3652, "inbatch_pos_score": 0.9961, "learning_rate": 7.833333333333333e-06, "loss": 3.3578, "norm_diff": 0.1127, "norm_loss": 0.0, "num_token_doc": 66.7944, "num_token_overlap": 14.5746, "num_token_query": 37.3644, "num_token_union": 65.441, "num_word_context": 202.5967, "num_word_doc": 49.8355, "num_word_query": 27.9683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6884.8684, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3645, "query_norm": 1.3654, "queue_k_norm": 1.4828, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3644, "sent_len_1": 66.7944, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.955, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.356, "doc_norm": 1.4782, "encoder_q-embeddings": 2212.4426, "encoder_q-layer.0": 1498.2129, "encoder_q-layer.1": 1580.2823, "encoder_q-layer.10": 2246.0181, "encoder_q-layer.11": 6420.7856, "encoder_q-layer.2": 1788.7156, "encoder_q-layer.3": 1868.8063, "encoder_q-layer.4": 1967.8833, "encoder_q-layer.5": 1935.8259, "encoder_q-layer.6": 2121.9858, "encoder_q-layer.7": 2113.2666, "encoder_q-layer.8": 2440.574, "encoder_q-layer.9": 2225.6106, "epoch": 0.84, "inbatch_neg_score": 0.3651, "inbatch_pos_score": 1.0049, "learning_rate": 7.777777777777777e-06, "loss": 3.356, "norm_diff": 0.1118, "norm_loss": 0.0, "num_token_doc": 66.9306, "num_token_overlap": 14.6253, "num_token_query": 37.3, "num_token_union": 65.4348, "num_word_context": 202.4864, "num_word_doc": 49.9456, "num_word_query": 27.9222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4162.5224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3645, "query_norm": 1.3664, "queue_k_norm": 1.4835, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3, "sent_len_1": 66.9306, "sent_len_max_0": 128.0, "sent_len_max_1": 189.755, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3481, "doc_norm": 1.4812, "encoder_q-embeddings": 2039.5093, "encoder_q-layer.0": 1392.7778, "encoder_q-layer.1": 1510.2612, "encoder_q-layer.10": 2487.5173, "encoder_q-layer.11": 6449.3066, "encoder_q-layer.2": 1723.6573, "encoder_q-layer.3": 1772.8723, "encoder_q-layer.4": 1893.0654, "encoder_q-layer.5": 1925.3881, "encoder_q-layer.6": 2109.1162, "encoder_q-layer.7": 2252.0156, "encoder_q-layer.8": 2561.907, "encoder_q-layer.9": 2364.1633, "epoch": 0.84, "inbatch_neg_score": 0.3649, "inbatch_pos_score": 1.0049, "learning_rate": 7.722222222222223e-06, "loss": 3.3481, "norm_diff": 0.1144, "norm_loss": 0.0, "num_token_doc": 66.6168, "num_token_overlap": 14.6184, "num_token_query": 37.3891, "num_token_union": 65.2774, "num_word_context": 202.1222, "num_word_doc": 49.7204, "num_word_query": 27.9952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4078.0294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3647, "query_norm": 1.3668, "queue_k_norm": 1.4816, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3891, "sent_len_1": 66.6168, "sent_len_max_0": 128.0, "sent_len_max_1": 189.28, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3275, "doc_norm": 1.4829, "encoder_q-embeddings": 5528.0327, "encoder_q-layer.0": 3553.9749, "encoder_q-layer.1": 4051.6101, "encoder_q-layer.10": 2423.6941, "encoder_q-layer.11": 6186.3857, "encoder_q-layer.2": 4832.6289, "encoder_q-layer.3": 4998.7251, "encoder_q-layer.4": 5176.5854, "encoder_q-layer.5": 5488.6392, "encoder_q-layer.6": 4528.5361, "encoder_q-layer.7": 3983.2454, "encoder_q-layer.8": 3600.9683, "encoder_q-layer.9": 2367.5112, "epoch": 0.84, "inbatch_neg_score": 0.3672, "inbatch_pos_score": 1.0156, "learning_rate": 7.666666666666667e-06, "loss": 3.3275, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.88, "num_token_overlap": 14.7097, "num_token_query": 37.6267, "num_token_union": 65.457, "num_word_context": 202.265, "num_word_doc": 49.8224, "num_word_query": 28.1637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6912.2238, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3662, "query_norm": 1.3699, "queue_k_norm": 1.4822, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.6267, "sent_len_1": 66.88, "sent_len_max_0": 127.9587, "sent_len_max_1": 191.3038, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3706, "doc_norm": 1.4812, "encoder_q-embeddings": 3450.8691, "encoder_q-layer.0": 2350.4824, "encoder_q-layer.1": 2559.822, "encoder_q-layer.10": 2257.6421, "encoder_q-layer.11": 6001.0483, "encoder_q-layer.2": 2665.7703, "encoder_q-layer.3": 2674.2148, "encoder_q-layer.4": 2716.9434, "encoder_q-layer.5": 2778.0256, "encoder_q-layer.6": 2837.6711, "encoder_q-layer.7": 2744.9412, "encoder_q-layer.8": 2904.3494, "encoder_q-layer.9": 2249.4324, "epoch": 0.84, "inbatch_neg_score": 0.3667, "inbatch_pos_score": 1.0264, "learning_rate": 7.611111111111112e-06, "loss": 3.3706, "norm_diff": 0.1115, "norm_loss": 0.0, "num_token_doc": 66.8285, "num_token_overlap": 14.5435, "num_token_query": 37.2857, "num_token_union": 65.4172, "num_word_context": 202.3206, "num_word_doc": 49.8962, "num_word_query": 27.9143, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4818.3658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3672, "query_norm": 1.3697, "queue_k_norm": 1.4824, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2857, "sent_len_1": 66.8285, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0275, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3674, "doc_norm": 1.4856, "encoder_q-embeddings": 2788.4392, "encoder_q-layer.0": 1930.826, "encoder_q-layer.1": 2064.3391, "encoder_q-layer.10": 2342.2358, "encoder_q-layer.11": 5850.106, "encoder_q-layer.2": 2299.564, "encoder_q-layer.3": 2377.8508, "encoder_q-layer.4": 2532.387, "encoder_q-layer.5": 2574.4473, "encoder_q-layer.6": 2689.4058, "encoder_q-layer.7": 3077.1614, "encoder_q-layer.8": 3293.5017, "encoder_q-layer.9": 2653.1902, "epoch": 0.84, "inbatch_neg_score": 0.3671, "inbatch_pos_score": 1.0186, "learning_rate": 7.555555555555556e-06, "loss": 3.3674, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.5785, "num_token_overlap": 14.5234, "num_token_query": 37.2033, "num_token_union": 65.2594, "num_word_context": 201.9112, "num_word_doc": 49.6994, "num_word_query": 27.879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4571.9715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3674, "query_norm": 1.3759, "queue_k_norm": 1.4811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2033, "sent_len_1": 66.5785, "sent_len_max_0": 127.995, "sent_len_max_1": 188.8, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3726, "doc_norm": 1.4822, "encoder_q-embeddings": 6637.2012, "encoder_q-layer.0": 5041.311, "encoder_q-layer.1": 5557.4604, "encoder_q-layer.10": 2303.0056, "encoder_q-layer.11": 6088.0195, "encoder_q-layer.2": 6511.5498, "encoder_q-layer.3": 6702.9482, "encoder_q-layer.4": 7477.6924, "encoder_q-layer.5": 7548.1772, "encoder_q-layer.6": 7656.9487, "encoder_q-layer.7": 7228.9204, "encoder_q-layer.8": 5041.6611, "encoder_q-layer.9": 2515.4219, "epoch": 0.84, "inbatch_neg_score": 0.3672, "inbatch_pos_score": 1.0234, "learning_rate": 7.5e-06, "loss": 3.3726, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.6721, "num_token_overlap": 14.5652, "num_token_query": 37.4253, "num_token_union": 65.3628, "num_word_context": 202.3857, "num_word_doc": 49.7169, "num_word_query": 28.0314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9569.8893, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3672, "query_norm": 1.3742, "queue_k_norm": 1.482, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4253, "sent_len_1": 66.6721, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0175, "stdk": 0.0488, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3544, "doc_norm": 1.4846, "encoder_q-embeddings": 1879.0679, "encoder_q-layer.0": 1242.5535, "encoder_q-layer.1": 1262.9365, "encoder_q-layer.10": 2410.7729, "encoder_q-layer.11": 6441.082, "encoder_q-layer.2": 1417.6655, "encoder_q-layer.3": 1496.0448, "encoder_q-layer.4": 1566.5909, "encoder_q-layer.5": 1636.5864, "encoder_q-layer.6": 1816.7135, "encoder_q-layer.7": 2056.3208, "encoder_q-layer.8": 2513.6387, "encoder_q-layer.9": 2376.2952, "epoch": 0.85, "inbatch_neg_score": 0.3683, "inbatch_pos_score": 1.0303, "learning_rate": 7.444444444444444e-06, "loss": 3.3544, "norm_diff": 0.1059, "norm_loss": 0.0, "num_token_doc": 66.6961, "num_token_overlap": 14.6096, "num_token_query": 37.5576, "num_token_union": 65.4692, "num_word_context": 202.2068, "num_word_doc": 49.7803, "num_word_query": 28.1298, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3961.5773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3696, "query_norm": 1.3787, "queue_k_norm": 1.4828, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5576, "sent_len_1": 66.6961, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8663, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3474, "doc_norm": 1.4828, "encoder_q-embeddings": 2189.0618, "encoder_q-layer.0": 1485.9082, "encoder_q-layer.1": 1559.6008, "encoder_q-layer.10": 2455.7349, "encoder_q-layer.11": 6283.9834, "encoder_q-layer.2": 1816.0616, "encoder_q-layer.3": 1796.9136, "encoder_q-layer.4": 1818.4762, "encoder_q-layer.5": 1941.9825, "encoder_q-layer.6": 2164.1616, "encoder_q-layer.7": 2297.2385, "encoder_q-layer.8": 2561.7029, "encoder_q-layer.9": 2262.5469, "epoch": 0.85, "inbatch_neg_score": 0.3712, "inbatch_pos_score": 1.0293, "learning_rate": 7.38888888888889e-06, "loss": 3.3474, "norm_diff": 0.1008, "norm_loss": 0.0, "num_token_doc": 66.9528, "num_token_overlap": 14.7033, "num_token_query": 37.4788, "num_token_union": 65.4778, "num_word_context": 202.3064, "num_word_doc": 49.9723, "num_word_query": 28.0458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4128.2767, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3699, "query_norm": 1.382, "queue_k_norm": 1.4832, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4788, "sent_len_1": 66.9528, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6675, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3328, "doc_norm": 1.4846, "encoder_q-embeddings": 1984.8448, "encoder_q-layer.0": 1291.8083, "encoder_q-layer.1": 1300.8829, "encoder_q-layer.10": 2359.0903, "encoder_q-layer.11": 6029.0645, "encoder_q-layer.2": 1437.1144, "encoder_q-layer.3": 1469.139, "encoder_q-layer.4": 1562.433, "encoder_q-layer.5": 1589.925, "encoder_q-layer.6": 1819.4768, "encoder_q-layer.7": 2029.2892, "encoder_q-layer.8": 2424.7949, "encoder_q-layer.9": 2191.1807, "epoch": 0.85, "inbatch_neg_score": 0.3672, "inbatch_pos_score": 1.0391, "learning_rate": 7.333333333333334e-06, "loss": 3.3328, "norm_diff": 0.1213, "norm_loss": 0.0, "num_token_doc": 66.8966, "num_token_overlap": 14.7387, "num_token_query": 37.6157, "num_token_union": 65.491, "num_word_context": 202.4689, "num_word_doc": 49.8736, "num_word_query": 28.1652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3836.0406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3677, "query_norm": 1.3633, "queue_k_norm": 1.4823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.6157, "sent_len_1": 66.8966, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5337, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.337, "doc_norm": 1.4772, "encoder_q-embeddings": 2237.3293, "encoder_q-layer.0": 1541.8228, "encoder_q-layer.1": 1642.7734, "encoder_q-layer.10": 2474.302, "encoder_q-layer.11": 6683.2363, "encoder_q-layer.2": 1805.3594, "encoder_q-layer.3": 1911.7058, "encoder_q-layer.4": 1966.5338, "encoder_q-layer.5": 2046.0416, "encoder_q-layer.6": 2237.5708, "encoder_q-layer.7": 2359.2581, "encoder_q-layer.8": 2821.5906, "encoder_q-layer.9": 2522.9592, "epoch": 0.85, "inbatch_neg_score": 0.3687, "inbatch_pos_score": 0.9971, "learning_rate": 7.277777777777778e-06, "loss": 3.337, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.5572, "num_token_overlap": 14.6409, "num_token_query": 37.3874, "num_token_union": 65.2933, "num_word_context": 201.8359, "num_word_doc": 49.6395, "num_word_query": 27.9968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4418.7546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3699, "query_norm": 1.3622, "queue_k_norm": 1.4814, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3874, "sent_len_1": 66.5572, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5987, "stdk": 0.0486, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.336, "doc_norm": 1.4861, "encoder_q-embeddings": 5865.0864, "encoder_q-layer.0": 4172.8975, "encoder_q-layer.1": 4769.5254, "encoder_q-layer.10": 4785.2739, "encoder_q-layer.11": 12831.5293, "encoder_q-layer.2": 5759.9985, "encoder_q-layer.3": 6514.8369, "encoder_q-layer.4": 6259.5244, "encoder_q-layer.5": 5570.4453, "encoder_q-layer.6": 7038.3745, "encoder_q-layer.7": 7407.7515, "encoder_q-layer.8": 7084.1914, "encoder_q-layer.9": 4767.1836, "epoch": 0.85, "inbatch_neg_score": 0.3716, "inbatch_pos_score": 1.0127, "learning_rate": 7.222222222222222e-06, "loss": 3.336, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.8117, "num_token_overlap": 14.6308, "num_token_query": 37.3817, "num_token_union": 65.3854, "num_word_context": 202.3463, "num_word_doc": 49.8475, "num_word_query": 28.0103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10370.1436, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3711, "query_norm": 1.3683, "queue_k_norm": 1.4818, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3817, "sent_len_1": 66.8117, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0662, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3575, "doc_norm": 1.492, "encoder_q-embeddings": 4136.3735, "encoder_q-layer.0": 2756.989, "encoder_q-layer.1": 2842.042, "encoder_q-layer.10": 4918.4326, "encoder_q-layer.11": 12474.7705, "encoder_q-layer.2": 3164.3362, "encoder_q-layer.3": 3376.0044, "encoder_q-layer.4": 3512.2202, "encoder_q-layer.5": 3594.6116, "encoder_q-layer.6": 4018.5413, "encoder_q-layer.7": 4337.7544, "encoder_q-layer.8": 5193.2021, "encoder_q-layer.9": 4730.9487, "epoch": 0.85, "inbatch_neg_score": 0.3666, "inbatch_pos_score": 1.0293, "learning_rate": 7.166666666666667e-06, "loss": 3.3575, "norm_diff": 0.1277, "norm_loss": 0.0, "num_token_doc": 66.7659, "num_token_overlap": 14.599, "num_token_query": 37.2818, "num_token_union": 65.3657, "num_word_context": 202.1576, "num_word_doc": 49.8298, "num_word_query": 27.9208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7998.1307, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3672, "query_norm": 1.3643, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2818, "sent_len_1": 66.7659, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3787, "stdk": 0.0492, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.3611, "doc_norm": 1.485, "encoder_q-embeddings": 3762.1294, "encoder_q-layer.0": 2751.646, "encoder_q-layer.1": 3194.5552, "encoder_q-layer.10": 2410.1665, "encoder_q-layer.11": 5873.2529, "encoder_q-layer.2": 3968.6338, "encoder_q-layer.3": 4554.9927, "encoder_q-layer.4": 5044.1582, "encoder_q-layer.5": 4527.4038, "encoder_q-layer.6": 4230.6758, "encoder_q-layer.7": 3396.4685, "encoder_q-layer.8": 2789.0564, "encoder_q-layer.9": 2332.8689, "epoch": 0.85, "inbatch_neg_score": 0.3654, "inbatch_pos_score": 1.0371, "learning_rate": 7.111111111111112e-06, "loss": 3.3611, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.6214, "num_token_overlap": 14.5411, "num_token_query": 37.2874, "num_token_union": 65.2882, "num_word_context": 201.8994, "num_word_doc": 49.7126, "num_word_query": 27.9178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5862.4721, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3667, "query_norm": 1.377, "queue_k_norm": 1.4837, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2874, "sent_len_1": 66.6214, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5925, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.3558, "doc_norm": 1.4848, "encoder_q-embeddings": 2071.6375, "encoder_q-layer.0": 1376.3888, "encoder_q-layer.1": 1444.6285, "encoder_q-layer.10": 2406.9573, "encoder_q-layer.11": 6040.7363, "encoder_q-layer.2": 1602.0811, "encoder_q-layer.3": 1645.5796, "encoder_q-layer.4": 1714.3066, "encoder_q-layer.5": 1707.3853, "encoder_q-layer.6": 1825.1227, "encoder_q-layer.7": 2008.9985, "encoder_q-layer.8": 2302.032, "encoder_q-layer.9": 2146.9548, "epoch": 0.85, "inbatch_neg_score": 0.3668, "inbatch_pos_score": 1.0205, "learning_rate": 7.055555555555556e-06, "loss": 3.3558, "norm_diff": 0.11, "norm_loss": 0.0, "num_token_doc": 66.7425, "num_token_overlap": 14.5349, "num_token_query": 37.2475, "num_token_union": 65.3338, "num_word_context": 202.1224, "num_word_doc": 49.8341, "num_word_query": 27.865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3841.693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3672, "query_norm": 1.3748, "queue_k_norm": 1.4836, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2475, "sent_len_1": 66.7425, "sent_len_max_0": 127.99, "sent_len_max_1": 188.7025, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3676, "doc_norm": 1.4867, "encoder_q-embeddings": 3596.9285, "encoder_q-layer.0": 2544.0056, "encoder_q-layer.1": 2802.9993, "encoder_q-layer.10": 2703.7678, "encoder_q-layer.11": 6379.4717, "encoder_q-layer.2": 3358.3831, "encoder_q-layer.3": 3609.9041, "encoder_q-layer.4": 4232.7524, "encoder_q-layer.5": 3558.396, "encoder_q-layer.6": 3215.7578, "encoder_q-layer.7": 3340.3542, "encoder_q-layer.8": 3284.8484, "encoder_q-layer.9": 2403.229, "epoch": 0.85, "inbatch_neg_score": 0.3693, "inbatch_pos_score": 1.0352, "learning_rate": 7.000000000000001e-06, "loss": 3.3676, "norm_diff": 0.1053, "norm_loss": 0.0, "num_token_doc": 66.7272, "num_token_overlap": 14.5153, "num_token_query": 37.1313, "num_token_union": 65.3123, "num_word_context": 202.2835, "num_word_doc": 49.8389, "num_word_query": 27.8217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5589.8432, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3687, "query_norm": 1.3814, "queue_k_norm": 1.4844, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1313, "sent_len_1": 66.7272, "sent_len_max_0": 128.0, "sent_len_max_1": 189.96, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.3515, "doc_norm": 1.4812, "encoder_q-embeddings": 2286.8303, "encoder_q-layer.0": 1534.5459, "encoder_q-layer.1": 1572.1105, "encoder_q-layer.10": 2665.8618, "encoder_q-layer.11": 6472.1909, "encoder_q-layer.2": 1713.397, "encoder_q-layer.3": 1762.0566, "encoder_q-layer.4": 1907.3812, "encoder_q-layer.5": 1979.198, "encoder_q-layer.6": 2012.4889, "encoder_q-layer.7": 2226.8955, "encoder_q-layer.8": 2585.8418, "encoder_q-layer.9": 2381.3958, "epoch": 0.85, "inbatch_neg_score": 0.3715, "inbatch_pos_score": 0.9956, "learning_rate": 6.944444444444445e-06, "loss": 3.3515, "norm_diff": 0.1095, "norm_loss": 0.0, "num_token_doc": 66.7155, "num_token_overlap": 14.6304, "num_token_query": 37.3529, "num_token_union": 65.3024, "num_word_context": 202.117, "num_word_doc": 49.7423, "num_word_query": 27.9665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4199.2689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3704, "query_norm": 1.3717, "queue_k_norm": 1.4841, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3529, "sent_len_1": 66.7155, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1712, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3505, "doc_norm": 1.4766, "encoder_q-embeddings": 2303.8423, "encoder_q-layer.0": 1620.533, "encoder_q-layer.1": 1767.1724, "encoder_q-layer.10": 2478.2664, "encoder_q-layer.11": 6364.8052, "encoder_q-layer.2": 2087.1167, "encoder_q-layer.3": 2274.4561, "encoder_q-layer.4": 2317.2769, "encoder_q-layer.5": 2282.6909, "encoder_q-layer.6": 2360.3027, "encoder_q-layer.7": 2473.5452, "encoder_q-layer.8": 2763.0449, "encoder_q-layer.9": 2549.6689, "epoch": 0.86, "inbatch_neg_score": 0.3721, "inbatch_pos_score": 1.0312, "learning_rate": 6.888888888888889e-06, "loss": 3.3505, "norm_diff": 0.1099, "norm_loss": 0.0, "num_token_doc": 66.6022, "num_token_overlap": 14.6204, "num_token_query": 37.5026, "num_token_union": 65.4017, "num_word_context": 202.1298, "num_word_doc": 49.713, "num_word_query": 28.0828, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4351.8423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3718, "query_norm": 1.3667, "queue_k_norm": 1.4821, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5026, "sent_len_1": 66.6022, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.545, "stdk": 0.0485, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3571, "doc_norm": 1.4831, "encoder_q-embeddings": 2569.0105, "encoder_q-layer.0": 1690.3431, "encoder_q-layer.1": 1902.0834, "encoder_q-layer.10": 2290.4006, "encoder_q-layer.11": 5974.5386, "encoder_q-layer.2": 2369.0645, "encoder_q-layer.3": 2384.4382, "encoder_q-layer.4": 2703.519, "encoder_q-layer.5": 2658.2048, "encoder_q-layer.6": 2666.4216, "encoder_q-layer.7": 2390.9915, "encoder_q-layer.8": 2578.5569, "encoder_q-layer.9": 2335.2837, "epoch": 0.86, "inbatch_neg_score": 0.3695, "inbatch_pos_score": 1.0088, "learning_rate": 6.833333333333333e-06, "loss": 3.3571, "norm_diff": 0.1184, "norm_loss": 0.0, "num_token_doc": 66.844, "num_token_overlap": 14.6014, "num_token_query": 37.3655, "num_token_union": 65.4601, "num_word_context": 202.355, "num_word_doc": 49.8908, "num_word_query": 27.9883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4386.9263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3689, "query_norm": 1.3648, "queue_k_norm": 1.4843, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3655, "sent_len_1": 66.844, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4762, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3614, "doc_norm": 1.4849, "encoder_q-embeddings": 1962.4659, "encoder_q-layer.0": 1375.9874, "encoder_q-layer.1": 1423.3668, "encoder_q-layer.10": 2383.167, "encoder_q-layer.11": 6039.9409, "encoder_q-layer.2": 1599.4385, "encoder_q-layer.3": 1654.4735, "encoder_q-layer.4": 1760.5034, "encoder_q-layer.5": 1858.0054, "encoder_q-layer.6": 1986.4652, "encoder_q-layer.7": 2056.0994, "encoder_q-layer.8": 2544.3706, "encoder_q-layer.9": 2247.9102, "epoch": 0.86, "inbatch_neg_score": 0.3706, "inbatch_pos_score": 1.0195, "learning_rate": 6.777777777777779e-06, "loss": 3.3614, "norm_diff": 0.1167, "norm_loss": 0.0, "num_token_doc": 66.861, "num_token_overlap": 14.6076, "num_token_query": 37.3537, "num_token_union": 65.4514, "num_word_context": 202.2597, "num_word_doc": 49.922, "num_word_query": 27.979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3863.1625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3706, "query_norm": 1.3682, "queue_k_norm": 1.4836, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3537, "sent_len_1": 66.861, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3638, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.3435, "doc_norm": 1.4842, "encoder_q-embeddings": 23008.2168, "encoder_q-layer.0": 16556.3809, "encoder_q-layer.1": 15988.2773, "encoder_q-layer.10": 2291.7793, "encoder_q-layer.11": 6080.1875, "encoder_q-layer.2": 19303.8281, "encoder_q-layer.3": 21257.6211, "encoder_q-layer.4": 22344.6523, "encoder_q-layer.5": 21999.4199, "encoder_q-layer.6": 16525.5371, "encoder_q-layer.7": 7794.7944, "encoder_q-layer.8": 4426.3584, "encoder_q-layer.9": 2513.6335, "epoch": 0.86, "inbatch_neg_score": 0.3707, "inbatch_pos_score": 1.0107, "learning_rate": 6.722222222222223e-06, "loss": 3.3435, "norm_diff": 0.1151, "norm_loss": 0.0, "num_token_doc": 66.5794, "num_token_overlap": 14.5734, "num_token_query": 37.3267, "num_token_union": 65.2647, "num_word_context": 201.7114, "num_word_doc": 49.6408, "num_word_query": 27.9179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24390.2515, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3706, "query_norm": 1.3691, "queue_k_norm": 1.4837, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3267, "sent_len_1": 66.5794, "sent_len_max_0": 128.0, "sent_len_max_1": 190.225, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3221, "doc_norm": 1.4847, "encoder_q-embeddings": 2282.5073, "encoder_q-layer.0": 1530.1388, "encoder_q-layer.1": 1619.0758, "encoder_q-layer.10": 2321.6826, "encoder_q-layer.11": 6143.3735, "encoder_q-layer.2": 1805.9089, "encoder_q-layer.3": 1874.9264, "encoder_q-layer.4": 1997.0248, "encoder_q-layer.5": 2051.2278, "encoder_q-layer.6": 2234.9553, "encoder_q-layer.7": 2522.6711, "encoder_q-layer.8": 2704.6528, "encoder_q-layer.9": 2283.3369, "epoch": 0.86, "inbatch_neg_score": 0.3704, "inbatch_pos_score": 1.0352, "learning_rate": 6.666666666666667e-06, "loss": 3.3221, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 66.5479, "num_token_overlap": 14.6443, "num_token_query": 37.5046, "num_token_union": 65.3289, "num_word_context": 202.0541, "num_word_doc": 49.655, "num_word_query": 28.0841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4185.2931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3706, "query_norm": 1.3733, "queue_k_norm": 1.484, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5046, "sent_len_1": 66.5479, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6887, "stdk": 0.0488, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3243, "doc_norm": 1.4834, "encoder_q-embeddings": 2965.6562, "encoder_q-layer.0": 2012.616, "encoder_q-layer.1": 2073.9521, "encoder_q-layer.10": 2307.3123, "encoder_q-layer.11": 6114.9292, "encoder_q-layer.2": 2526.3215, "encoder_q-layer.3": 2645.6108, "encoder_q-layer.4": 2725.0142, "encoder_q-layer.5": 2747.3064, "encoder_q-layer.6": 2829.157, "encoder_q-layer.7": 2976.8152, "encoder_q-layer.8": 2963.8386, "encoder_q-layer.9": 2236.2822, "epoch": 0.86, "inbatch_neg_score": 0.3688, "inbatch_pos_score": 1.0449, "learning_rate": 6.611111111111111e-06, "loss": 3.3243, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 67.0771, "num_token_overlap": 14.6746, "num_token_query": 37.4241, "num_token_union": 65.534, "num_word_context": 202.6061, "num_word_doc": 50.0261, "num_word_query": 28.0282, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4722.9459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3694, "query_norm": 1.3882, "queue_k_norm": 1.4852, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4241, "sent_len_1": 67.0771, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.8075, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3385, "doc_norm": 1.4879, "encoder_q-embeddings": 2049.5022, "encoder_q-layer.0": 1331.6713, "encoder_q-layer.1": 1408.578, "encoder_q-layer.10": 2432.8826, "encoder_q-layer.11": 6551.6245, "encoder_q-layer.2": 1563.1638, "encoder_q-layer.3": 1608.0345, "encoder_q-layer.4": 1781.1876, "encoder_q-layer.5": 1794.9624, "encoder_q-layer.6": 1994.6351, "encoder_q-layer.7": 2300.0347, "encoder_q-layer.8": 2809.1155, "encoder_q-layer.9": 2447.4397, "epoch": 0.86, "inbatch_neg_score": 0.372, "inbatch_pos_score": 1.0195, "learning_rate": 6.555555555555556e-06, "loss": 3.3385, "norm_diff": 0.1132, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 14.6044, "num_token_query": 37.2764, "num_token_union": 65.3907, "num_word_context": 202.0761, "num_word_doc": 49.881, "num_word_query": 27.9173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4170.5399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3718, "query_norm": 1.3746, "queue_k_norm": 1.4834, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2764, "sent_len_1": 66.8578, "sent_len_max_0": 127.995, "sent_len_max_1": 188.855, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.3412, "doc_norm": 1.4832, "encoder_q-embeddings": 2418.4507, "encoder_q-layer.0": 1603.7905, "encoder_q-layer.1": 1752.397, "encoder_q-layer.10": 2644.7683, "encoder_q-layer.11": 6220.3838, "encoder_q-layer.2": 2057.9541, "encoder_q-layer.3": 2101.6401, "encoder_q-layer.4": 2306.3452, "encoder_q-layer.5": 2295.2485, "encoder_q-layer.6": 2347.9636, "encoder_q-layer.7": 2267.2771, "encoder_q-layer.8": 2496.4365, "encoder_q-layer.9": 2235.7507, "epoch": 0.86, "inbatch_neg_score": 0.3725, "inbatch_pos_score": 1.04, "learning_rate": 6.5000000000000004e-06, "loss": 3.3412, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 67.0292, "num_token_overlap": 14.622, "num_token_query": 37.327, "num_token_union": 65.5055, "num_word_context": 202.5361, "num_word_doc": 49.9921, "num_word_query": 27.9661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4323.8184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.373, "query_norm": 1.3787, "queue_k_norm": 1.4839, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.327, "sent_len_1": 67.0292, "sent_len_max_0": 127.975, "sent_len_max_1": 190.2125, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3458, "doc_norm": 1.4821, "encoder_q-embeddings": 3197.5835, "encoder_q-layer.0": 2131.9395, "encoder_q-layer.1": 2378.9395, "encoder_q-layer.10": 2228.6443, "encoder_q-layer.11": 6138.5654, "encoder_q-layer.2": 2713.8635, "encoder_q-layer.3": 2703.1687, "encoder_q-layer.4": 2622.0408, "encoder_q-layer.5": 2552.4651, "encoder_q-layer.6": 2427.2871, "encoder_q-layer.7": 2339.4675, "encoder_q-layer.8": 2465.6562, "encoder_q-layer.9": 2239.4834, "epoch": 0.86, "inbatch_neg_score": 0.3754, "inbatch_pos_score": 1.0439, "learning_rate": 6.4444444444444445e-06, "loss": 3.3458, "norm_diff": 0.1095, "norm_loss": 0.0, "num_token_doc": 66.7238, "num_token_overlap": 14.5668, "num_token_query": 37.4879, "num_token_union": 65.5059, "num_word_context": 202.1799, "num_word_doc": 49.7701, "num_word_query": 28.0679, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4632.3303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3745, "query_norm": 1.3726, "queue_k_norm": 1.4855, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4879, "sent_len_1": 66.7238, "sent_len_max_0": 128.0, "sent_len_max_1": 190.215, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.3565, "doc_norm": 1.4838, "encoder_q-embeddings": 2322.9407, "encoder_q-layer.0": 1583.8226, "encoder_q-layer.1": 1667.8734, "encoder_q-layer.10": 2405.0684, "encoder_q-layer.11": 6627.3389, "encoder_q-layer.2": 1897.6455, "encoder_q-layer.3": 1955.3372, "encoder_q-layer.4": 2032.4995, "encoder_q-layer.5": 2006.2268, "encoder_q-layer.6": 2121.0425, "encoder_q-layer.7": 2326.0923, "encoder_q-layer.8": 2706.5178, "encoder_q-layer.9": 2445.4907, "epoch": 0.86, "inbatch_neg_score": 0.3733, "inbatch_pos_score": 0.9902, "learning_rate": 6.3888888888888885e-06, "loss": 3.3565, "norm_diff": 0.1257, "norm_loss": 0.0, "num_token_doc": 66.8146, "num_token_overlap": 14.5167, "num_token_query": 37.1504, "num_token_union": 65.3215, "num_word_context": 202.2814, "num_word_doc": 49.8483, "num_word_query": 27.8167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4312.982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.374, "query_norm": 1.3581, "queue_k_norm": 1.4846, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1504, "sent_len_1": 66.8146, "sent_len_max_0": 128.0, "sent_len_max_1": 189.765, "stdk": 0.0488, "stdq": 0.0437, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3504, "doc_norm": 1.4789, "encoder_q-embeddings": 2168.3687, "encoder_q-layer.0": 1498.1346, "encoder_q-layer.1": 1644.2415, "encoder_q-layer.10": 2309.6226, "encoder_q-layer.11": 6207.2646, "encoder_q-layer.2": 1859.3849, "encoder_q-layer.3": 1881.2031, "encoder_q-layer.4": 1965.7073, "encoder_q-layer.5": 1909.2246, "encoder_q-layer.6": 2026.4689, "encoder_q-layer.7": 2133.303, "encoder_q-layer.8": 2527.3792, "encoder_q-layer.9": 2329.6741, "epoch": 0.87, "inbatch_neg_score": 0.3746, "inbatch_pos_score": 1.0137, "learning_rate": 6.333333333333334e-06, "loss": 3.3504, "norm_diff": 0.1131, "norm_loss": 0.0, "num_token_doc": 67.0108, "num_token_overlap": 14.5787, "num_token_query": 37.1695, "num_token_union": 65.3896, "num_word_context": 202.2015, "num_word_doc": 49.9652, "num_word_query": 27.8301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4062.9191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3743, "query_norm": 1.3657, "queue_k_norm": 1.4846, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1695, "sent_len_1": 67.0108, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5813, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3494, "doc_norm": 1.4806, "encoder_q-embeddings": 2335.6648, "encoder_q-layer.0": 1565.5627, "encoder_q-layer.1": 1650.3398, "encoder_q-layer.10": 2399.4817, "encoder_q-layer.11": 6300.709, "encoder_q-layer.2": 1861.4918, "encoder_q-layer.3": 1824.6934, "encoder_q-layer.4": 1848.9409, "encoder_q-layer.5": 1986.7524, "encoder_q-layer.6": 2138.9626, "encoder_q-layer.7": 2318.6829, "encoder_q-layer.8": 2612.7161, "encoder_q-layer.9": 2337.6995, "epoch": 0.87, "inbatch_neg_score": 0.3767, "inbatch_pos_score": 1.0195, "learning_rate": 6.277777777777778e-06, "loss": 3.3494, "norm_diff": 0.1193, "norm_loss": 0.0, "num_token_doc": 66.7886, "num_token_overlap": 14.6504, "num_token_query": 37.4149, "num_token_union": 65.3725, "num_word_context": 202.4772, "num_word_doc": 49.8415, "num_word_query": 28.0198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4178.9349, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.3613, "queue_k_norm": 1.4855, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4149, "sent_len_1": 66.7886, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.9162, "stdk": 0.0487, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3343, "doc_norm": 1.4801, "encoder_q-embeddings": 2774.8899, "encoder_q-layer.0": 1857.0206, "encoder_q-layer.1": 2040.0074, "encoder_q-layer.10": 2352.1143, "encoder_q-layer.11": 6212.3154, "encoder_q-layer.2": 2452.8796, "encoder_q-layer.3": 2516.4622, "encoder_q-layer.4": 2728.8938, "encoder_q-layer.5": 2624.0078, "encoder_q-layer.6": 2727.2148, "encoder_q-layer.7": 2631.0562, "encoder_q-layer.8": 2504.374, "encoder_q-layer.9": 2246.0662, "epoch": 0.87, "inbatch_neg_score": 0.3774, "inbatch_pos_score": 1.0303, "learning_rate": 6.222222222222222e-06, "loss": 3.3343, "norm_diff": 0.1037, "norm_loss": 0.0, "num_token_doc": 66.738, "num_token_overlap": 14.6224, "num_token_query": 37.2901, "num_token_union": 65.2827, "num_word_context": 202.0323, "num_word_doc": 49.8049, "num_word_query": 27.9383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4534.6078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3779, "query_norm": 1.3765, "queue_k_norm": 1.4862, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2901, "sent_len_1": 66.738, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5375, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3357, "doc_norm": 1.4929, "encoder_q-embeddings": 2641.9521, "encoder_q-layer.0": 1804.3613, "encoder_q-layer.1": 1906.5585, "encoder_q-layer.10": 2433.407, "encoder_q-layer.11": 6308.603, "encoder_q-layer.2": 2223.843, "encoder_q-layer.3": 2356.1223, "encoder_q-layer.4": 2379.741, "encoder_q-layer.5": 2411.051, "encoder_q-layer.6": 2617.293, "encoder_q-layer.7": 2715.0676, "encoder_q-layer.8": 2903.6628, "encoder_q-layer.9": 2271.5393, "epoch": 0.87, "inbatch_neg_score": 0.3811, "inbatch_pos_score": 1.041, "learning_rate": 6.166666666666667e-06, "loss": 3.3357, "norm_diff": 0.1101, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 14.5738, "num_token_query": 37.3104, "num_token_union": 65.3723, "num_word_context": 201.9161, "num_word_doc": 49.8502, "num_word_query": 27.934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4548.1072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3801, "query_norm": 1.3829, "queue_k_norm": 1.4855, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3104, "sent_len_1": 66.8275, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.9387, "stdk": 0.0491, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3478, "doc_norm": 1.4799, "encoder_q-embeddings": 3062.7146, "encoder_q-layer.0": 2254.5034, "encoder_q-layer.1": 2387.6016, "encoder_q-layer.10": 2573.8813, "encoder_q-layer.11": 6502.918, "encoder_q-layer.2": 2817.8203, "encoder_q-layer.3": 2956.6956, "encoder_q-layer.4": 2931.3533, "encoder_q-layer.5": 2922.1772, "encoder_q-layer.6": 3312.6506, "encoder_q-layer.7": 3101.4373, "encoder_q-layer.8": 2890.1333, "encoder_q-layer.9": 2460.105, "epoch": 0.87, "inbatch_neg_score": 0.38, "inbatch_pos_score": 1.0508, "learning_rate": 6.111111111111111e-06, "loss": 3.3478, "norm_diff": 0.0812, "norm_loss": 0.0, "num_token_doc": 66.8273, "num_token_overlap": 14.6046, "num_token_query": 37.3741, "num_token_union": 65.422, "num_word_context": 202.35, "num_word_doc": 49.8425, "num_word_query": 27.9765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4950.2218, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3801, "query_norm": 1.3987, "queue_k_norm": 1.4873, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3741, "sent_len_1": 66.8273, "sent_len_max_0": 128.0, "sent_len_max_1": 189.945, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.314, "doc_norm": 1.4939, "encoder_q-embeddings": 3490.4575, "encoder_q-layer.0": 2401.0393, "encoder_q-layer.1": 2757.9326, "encoder_q-layer.10": 2324.7671, "encoder_q-layer.11": 6481.0127, "encoder_q-layer.2": 2992.4004, "encoder_q-layer.3": 3061.8547, "encoder_q-layer.4": 3314.6128, "encoder_q-layer.5": 3592.8833, "encoder_q-layer.6": 3613.3845, "encoder_q-layer.7": 3117.252, "encoder_q-layer.8": 2880.896, "encoder_q-layer.9": 2333.894, "epoch": 0.87, "inbatch_neg_score": 0.3821, "inbatch_pos_score": 1.0439, "learning_rate": 6.055555555555556e-06, "loss": 3.314, "norm_diff": 0.1211, "norm_loss": 0.0, "num_token_doc": 66.9958, "num_token_overlap": 14.6875, "num_token_query": 37.6232, "num_token_union": 65.6101, "num_word_context": 202.5102, "num_word_doc": 49.9913, "num_word_query": 28.1729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5290.3863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3826, "query_norm": 1.3728, "queue_k_norm": 1.488, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6232, "sent_len_1": 66.9958, "sent_len_max_0": 127.995, "sent_len_max_1": 188.5987, "stdk": 0.0492, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.3479, "doc_norm": 1.479, "encoder_q-embeddings": 4955.0615, "encoder_q-layer.0": 3306.8215, "encoder_q-layer.1": 3904.5796, "encoder_q-layer.10": 2311.4331, "encoder_q-layer.11": 6168.8804, "encoder_q-layer.2": 4610.3438, "encoder_q-layer.3": 5348.811, "encoder_q-layer.4": 5419.3647, "encoder_q-layer.5": 5943.3164, "encoder_q-layer.6": 6074.4209, "encoder_q-layer.7": 4949.5615, "encoder_q-layer.8": 3483.4539, "encoder_q-layer.9": 2513.8923, "epoch": 0.87, "inbatch_neg_score": 0.3814, "inbatch_pos_score": 1.0215, "learning_rate": 6e-06, "loss": 3.3479, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.9636, "num_token_overlap": 14.65, "num_token_query": 37.3598, "num_token_union": 65.436, "num_word_context": 202.2135, "num_word_doc": 49.9318, "num_word_query": 27.9797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7074.5282, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3828, "query_norm": 1.378, "queue_k_norm": 1.488, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3598, "sent_len_1": 66.9636, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.185, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.3301, "doc_norm": 1.4836, "encoder_q-embeddings": 2532.9561, "encoder_q-layer.0": 1670.859, "encoder_q-layer.1": 1850.574, "encoder_q-layer.10": 2434.0044, "encoder_q-layer.11": 6328.3267, "encoder_q-layer.2": 2095.4988, "encoder_q-layer.3": 2245.4126, "encoder_q-layer.4": 2392.0093, "encoder_q-layer.5": 2588.8123, "encoder_q-layer.6": 2474.8423, "encoder_q-layer.7": 2514.2007, "encoder_q-layer.8": 2681.2173, "encoder_q-layer.9": 2297.1077, "epoch": 0.87, "inbatch_neg_score": 0.3839, "inbatch_pos_score": 1.0049, "learning_rate": 5.944444444444445e-06, "loss": 3.3301, "norm_diff": 0.1089, "norm_loss": 0.0, "num_token_doc": 66.6281, "num_token_overlap": 14.5615, "num_token_query": 37.4144, "num_token_union": 65.3656, "num_word_context": 202.2702, "num_word_doc": 49.7009, "num_word_query": 28.0058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4445.9664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3848, "query_norm": 1.3747, "queue_k_norm": 1.4866, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4144, "sent_len_1": 66.6281, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3262, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3539, "doc_norm": 1.4884, "encoder_q-embeddings": 2921.4333, "encoder_q-layer.0": 1966.2618, "encoder_q-layer.1": 2143.0679, "encoder_q-layer.10": 2347.4087, "encoder_q-layer.11": 6456.1426, "encoder_q-layer.2": 2532.6711, "encoder_q-layer.3": 2632.0398, "encoder_q-layer.4": 3155.4924, "encoder_q-layer.5": 3055.1526, "encoder_q-layer.6": 3154.2195, "encoder_q-layer.7": 2775.9639, "encoder_q-layer.8": 2760.791, "encoder_q-layer.9": 2327.5627, "epoch": 0.87, "inbatch_neg_score": 0.3865, "inbatch_pos_score": 1.0449, "learning_rate": 5.888888888888889e-06, "loss": 3.3539, "norm_diff": 0.1034, "norm_loss": 0.0, "num_token_doc": 67.0534, "num_token_overlap": 14.5946, "num_token_query": 37.3779, "num_token_union": 65.5725, "num_word_context": 202.7093, "num_word_doc": 50.0282, "num_word_query": 27.9667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4869.8377, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3857, "query_norm": 1.3851, "queue_k_norm": 1.4895, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3779, "sent_len_1": 67.0534, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.1463, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.3323, "doc_norm": 1.4877, "encoder_q-embeddings": 11443.7461, "encoder_q-layer.0": 7609.6953, "encoder_q-layer.1": 7830.04, "encoder_q-layer.10": 2351.6929, "encoder_q-layer.11": 6223.7163, "encoder_q-layer.2": 10110.3135, "encoder_q-layer.3": 11135.9639, "encoder_q-layer.4": 11406.918, "encoder_q-layer.5": 10863.4971, "encoder_q-layer.6": 10414.8418, "encoder_q-layer.7": 9886.1562, "encoder_q-layer.8": 7327.5991, "encoder_q-layer.9": 2908.1521, "epoch": 0.87, "inbatch_neg_score": 0.3885, "inbatch_pos_score": 1.0244, "learning_rate": 5.833333333333334e-06, "loss": 3.3323, "norm_diff": 0.1093, "norm_loss": 0.0, "num_token_doc": 66.7762, "num_token_overlap": 14.618, "num_token_query": 37.3504, "num_token_union": 65.3471, "num_word_context": 201.9721, "num_word_doc": 49.7934, "num_word_query": 27.9603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13672.261, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3889, "query_norm": 1.3784, "queue_k_norm": 1.4872, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3504, "sent_len_1": 66.7762, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2225, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3347, "doc_norm": 1.4917, "encoder_q-embeddings": 2167.8301, "encoder_q-layer.0": 1476.3182, "encoder_q-layer.1": 1525.2437, "encoder_q-layer.10": 2367.9114, "encoder_q-layer.11": 6214.0327, "encoder_q-layer.2": 1673.9386, "encoder_q-layer.3": 1739.2678, "encoder_q-layer.4": 1820.9323, "encoder_q-layer.5": 1840.7057, "encoder_q-layer.6": 2102.3362, "encoder_q-layer.7": 2328.4814, "encoder_q-layer.8": 2678.7339, "encoder_q-layer.9": 2326.7568, "epoch": 0.87, "inbatch_neg_score": 0.3924, "inbatch_pos_score": 1.0518, "learning_rate": 5.777777777777778e-06, "loss": 3.3347, "norm_diff": 0.1095, "norm_loss": 0.0, "num_token_doc": 66.8394, "num_token_overlap": 14.587, "num_token_query": 37.4642, "num_token_union": 65.4833, "num_word_context": 202.3449, "num_word_doc": 49.8426, "num_word_query": 28.0688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4044.2504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.3821, "queue_k_norm": 1.4904, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4642, "sent_len_1": 66.8394, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.17, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3714, "doc_norm": 1.4884, "encoder_q-embeddings": 2412.4248, "encoder_q-layer.0": 1691.7111, "encoder_q-layer.1": 1813.0876, "encoder_q-layer.10": 2336.9905, "encoder_q-layer.11": 6196.4312, "encoder_q-layer.2": 2069.5613, "encoder_q-layer.3": 2053.3245, "encoder_q-layer.4": 2084.1396, "encoder_q-layer.5": 2099.425, "encoder_q-layer.6": 2257.9583, "encoder_q-layer.7": 2258.5281, "encoder_q-layer.8": 2578.3408, "encoder_q-layer.9": 2246.8018, "epoch": 0.88, "inbatch_neg_score": 0.3915, "inbatch_pos_score": 1.0508, "learning_rate": 5.722222222222223e-06, "loss": 3.3714, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.6412, "num_token_overlap": 14.6119, "num_token_query": 37.3662, "num_token_union": 65.3313, "num_word_context": 202.4534, "num_word_doc": 49.757, "num_word_query": 27.9747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4262.2034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 1.3793, "queue_k_norm": 1.4886, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3662, "sent_len_1": 66.6412, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.135, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3396, "doc_norm": 1.4877, "encoder_q-embeddings": 2911.7031, "encoder_q-layer.0": 1978.5221, "encoder_q-layer.1": 2065.8638, "encoder_q-layer.10": 2493.2322, "encoder_q-layer.11": 6419.6392, "encoder_q-layer.2": 2517.8635, "encoder_q-layer.3": 2664.4736, "encoder_q-layer.4": 2730.4275, "encoder_q-layer.5": 2823.7646, "encoder_q-layer.6": 2868.429, "encoder_q-layer.7": 3030.5588, "encoder_q-layer.8": 2943.1389, "encoder_q-layer.9": 2331.8716, "epoch": 0.88, "inbatch_neg_score": 0.3907, "inbatch_pos_score": 1.0264, "learning_rate": 5.666666666666667e-06, "loss": 3.3396, "norm_diff": 0.1218, "norm_loss": 0.0, "num_token_doc": 66.7418, "num_token_overlap": 14.5988, "num_token_query": 37.4636, "num_token_union": 65.4455, "num_word_context": 202.4627, "num_word_doc": 49.8463, "num_word_query": 28.0603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4800.153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3909, "query_norm": 1.3659, "queue_k_norm": 1.4903, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4636, "sent_len_1": 66.7418, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3862, "stdk": 0.0488, "stdq": 0.0437, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3605, "doc_norm": 1.4953, "encoder_q-embeddings": 3239.8567, "encoder_q-layer.0": 2096.9866, "encoder_q-layer.1": 2217.7773, "encoder_q-layer.10": 2328.4202, "encoder_q-layer.11": 6263.6709, "encoder_q-layer.2": 2693.3103, "encoder_q-layer.3": 2898.823, "encoder_q-layer.4": 3088.5535, "encoder_q-layer.5": 2709.9722, "encoder_q-layer.6": 2929.9819, "encoder_q-layer.7": 2970.6042, "encoder_q-layer.8": 3098.5547, "encoder_q-layer.9": 2282.1367, "epoch": 0.88, "inbatch_neg_score": 0.3913, "inbatch_pos_score": 1.0635, "learning_rate": 5.611111111111112e-06, "loss": 3.3605, "norm_diff": 0.1178, "norm_loss": 0.0, "num_token_doc": 66.7926, "num_token_overlap": 14.5551, "num_token_query": 37.2332, "num_token_union": 65.3708, "num_word_context": 202.7158, "num_word_doc": 49.87, "num_word_query": 27.8735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4919.7298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.3775, "queue_k_norm": 1.492, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2332, "sent_len_1": 66.7926, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.97, "stdk": 0.0491, "stdq": 0.0442, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3673, "doc_norm": 1.4851, "encoder_q-embeddings": 2383.9204, "encoder_q-layer.0": 1677.4556, "encoder_q-layer.1": 1859.8394, "encoder_q-layer.10": 2616.1924, "encoder_q-layer.11": 6547.2607, "encoder_q-layer.2": 2168.3967, "encoder_q-layer.3": 2321.8025, "encoder_q-layer.4": 2632.7961, "encoder_q-layer.5": 2437.7344, "encoder_q-layer.6": 2176.1697, "encoder_q-layer.7": 2321.5327, "encoder_q-layer.8": 2629.4331, "encoder_q-layer.9": 2363.791, "epoch": 0.88, "inbatch_neg_score": 0.3928, "inbatch_pos_score": 1.0332, "learning_rate": 5.555555555555556e-06, "loss": 3.3673, "norm_diff": 0.1089, "norm_loss": 0.0, "num_token_doc": 66.7082, "num_token_overlap": 14.5436, "num_token_query": 37.1745, "num_token_union": 65.2466, "num_word_context": 201.842, "num_word_doc": 49.7876, "num_word_query": 27.8051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4472.5298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3921, "query_norm": 1.3763, "queue_k_norm": 1.4913, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1745, "sent_len_1": 66.7082, "sent_len_max_0": 127.99, "sent_len_max_1": 189.235, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 29.131, "dev_samples_per_second": 2.197, "dev_steps_per_second": 0.034, "epoch": 0.88, "step": 90000, "test_accuracy": 93.505859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37590304017066956, "test_doc_norm": 1.4698147773742676, "test_inbatch_neg_score": 0.744731605052948, "test_inbatch_pos_score": 1.7084084749221802, "test_loss": 0.37590304017066956, "test_loss_align": 0.9874005317687988, "test_loss_unif": 3.694220781326294, "test_loss_unif_q@queue": 3.6942203044891357, "test_norm_diff": 0.018500367179512978, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3858965039253235, "test_query_norm": 1.4875659942626953, "test_queue_k_norm": 1.4911621809005737, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042973846197128296, "test_stdq": 0.04313674196600914, "test_stdqueue_k": 0.04900799319148064, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.131, "dev_samples_per_second": 2.197, "dev_steps_per_second": 0.034, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.38888, "eval_beir-arguana_recall@10": 0.65789, "eval_beir-arguana_recall@100": 0.93812, "eval_beir-arguana_recall@20": 0.79232, "eval_beir-avg_ndcg@10": 0.38352458333333334, "eval_beir-avg_recall@10": 0.45274099999999995, "eval_beir-avg_recall@100": 0.6354398333333333, "eval_beir-avg_recall@20": 0.5120008333333333, "eval_beir-cqadupstack_ndcg@10": 0.27259583333333337, "eval_beir-cqadupstack_recall@10": 0.36774, "eval_beir-cqadupstack_recall@100": 0.5993283333333334, "eval_beir-cqadupstack_recall@20": 0.42936833333333335, "eval_beir-fiqa_ndcg@10": 0.23499, "eval_beir-fiqa_recall@10": 0.29078, "eval_beir-fiqa_recall@100": 0.56937, "eval_beir-fiqa_recall@20": 0.37072, "eval_beir-nfcorpus_ndcg@10": 0.31292, "eval_beir-nfcorpus_recall@10": 0.15408, "eval_beir-nfcorpus_recall@100": 0.29975, "eval_beir-nfcorpus_recall@20": 0.18434, "eval_beir-nq_ndcg@10": 0.26954, "eval_beir-nq_recall@10": 0.45259, "eval_beir-nq_recall@100": 0.7878, "eval_beir-nq_recall@20": 0.57047, "eval_beir-quora_ndcg@10": 0.77447, "eval_beir-quora_recall@10": 0.88554, "eval_beir-quora_recall@100": 0.97738, "eval_beir-quora_recall@20": 0.92961, "eval_beir-scidocs_ndcg@10": 0.14988, "eval_beir-scidocs_recall@10": 0.15502, "eval_beir-scidocs_recall@100": 0.36802, "eval_beir-scidocs_recall@20": 0.21122, "eval_beir-scifact_ndcg@10": 0.65792, "eval_beir-scifact_recall@10": 0.79633, "eval_beir-scifact_recall@100": 0.90822, "eval_beir-scifact_recall@20": 0.83311, "eval_beir-trec-covid_ndcg@10": 0.59394, "eval_beir-trec-covid_recall@10": 0.636, "eval_beir-trec-covid_recall@100": 0.4572, "eval_beir-trec-covid_recall@20": 0.594, "eval_beir-webis-touche2020_ndcg@10": 0.18011, "eval_beir-webis-touche2020_recall@10": 0.13144, "eval_beir-webis-touche2020_recall@100": 0.44921, "eval_beir-webis-touche2020_recall@20": 0.20485, "eval_senteval-avg_sts": 0.7450988863316512, "eval_senteval-sickr_spearman": 0.7154128331716759, "eval_senteval-stsb_spearman": 0.7747849394916264, "step": 90000, "test_accuracy": 93.505859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37590304017066956, "test_doc_norm": 1.4698147773742676, "test_inbatch_neg_score": 0.744731605052948, "test_inbatch_pos_score": 1.7084084749221802, "test_loss": 0.37590304017066956, "test_loss_align": 0.9874005317687988, "test_loss_unif": 3.694220781326294, "test_loss_unif_q@queue": 3.6942203044891357, "test_norm_diff": 0.018500367179512978, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3858965039253235, "test_query_norm": 1.4875659942626953, "test_queue_k_norm": 1.4911621809005737, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042973846197128296, "test_stdq": 0.04313674196600914, "test_stdqueue_k": 0.04900799319148064, "test_stdqueue_q": 0.0 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3341, "doc_norm": 1.4941, "encoder_q-embeddings": 2430.8567, "encoder_q-layer.0": 1645.7274, "encoder_q-layer.1": 1786.2518, "encoder_q-layer.10": 2190.9971, "encoder_q-layer.11": 6030.8882, "encoder_q-layer.2": 2125.6946, "encoder_q-layer.3": 2109.9485, "encoder_q-layer.4": 2140.1772, "encoder_q-layer.5": 2147.0176, "encoder_q-layer.6": 2192.1738, "encoder_q-layer.7": 2448.0803, "encoder_q-layer.8": 2485.4902, "encoder_q-layer.9": 2189.9937, "epoch": 0.88, "inbatch_neg_score": 0.3911, "inbatch_pos_score": 1.0469, "learning_rate": 5.500000000000001e-06, "loss": 3.3341, "norm_diff": 0.1244, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 14.6052, "num_token_query": 37.5199, "num_token_union": 65.4639, "num_word_context": 202.3118, "num_word_doc": 49.7576, "num_word_query": 28.094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4199.2557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.3696, "queue_k_norm": 1.4914, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5199, "sent_len_1": 66.7301, "sent_len_max_0": 127.9825, "sent_len_max_1": 189.9387, "stdk": 0.049, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3386, "doc_norm": 1.4841, "encoder_q-embeddings": 2994.4985, "encoder_q-layer.0": 2074.3684, "encoder_q-layer.1": 2307.8623, "encoder_q-layer.10": 2402.0176, "encoder_q-layer.11": 6458.6836, "encoder_q-layer.2": 2683.6045, "encoder_q-layer.3": 2778.9226, "encoder_q-layer.4": 2841.2134, "encoder_q-layer.5": 2860.9087, "encoder_q-layer.6": 2918.011, "encoder_q-layer.7": 2656.9619, "encoder_q-layer.8": 2787.4036, "encoder_q-layer.9": 2296.5168, "epoch": 0.88, "inbatch_neg_score": 0.3911, "inbatch_pos_score": 1.042, "learning_rate": 5.444444444444445e-06, "loss": 3.3386, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.8988, "num_token_overlap": 14.5297, "num_token_query": 37.2084, "num_token_union": 65.4047, "num_word_context": 202.4232, "num_word_doc": 49.8878, "num_word_query": 27.8339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4835.5963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.3692, "queue_k_norm": 1.4923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2084, "sent_len_1": 66.8988, "sent_len_max_0": 128.0, "sent_len_max_1": 193.0213, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3202, "doc_norm": 1.4964, "encoder_q-embeddings": 2537.4084, "encoder_q-layer.0": 1697.4312, "encoder_q-layer.1": 1785.3972, "encoder_q-layer.10": 2529.5005, "encoder_q-layer.11": 6853.6782, "encoder_q-layer.2": 2042.5862, "encoder_q-layer.3": 2149.1025, "encoder_q-layer.4": 2374.6919, "encoder_q-layer.5": 2434.958, "encoder_q-layer.6": 2433.8262, "encoder_q-layer.7": 2698.1152, "encoder_q-layer.8": 2837.6348, "encoder_q-layer.9": 2468.0483, "epoch": 0.88, "inbatch_neg_score": 0.3931, "inbatch_pos_score": 1.0381, "learning_rate": 5.388888888888889e-06, "loss": 3.3202, "norm_diff": 0.1157, "norm_loss": 0.0, "num_token_doc": 67.0763, "num_token_overlap": 14.6563, "num_token_query": 37.329, "num_token_union": 65.5017, "num_word_context": 202.3952, "num_word_doc": 50.0811, "num_word_query": 27.9199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4556.7899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3936, "query_norm": 1.3807, "queue_k_norm": 1.4915, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.329, "sent_len_1": 67.0763, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5213, "stdk": 0.0491, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.33, "doc_norm": 1.4861, "encoder_q-embeddings": 2115.9583, "encoder_q-layer.0": 1421.0184, "encoder_q-layer.1": 1547.5583, "encoder_q-layer.10": 2204.603, "encoder_q-layer.11": 6238.916, "encoder_q-layer.2": 1712.2477, "encoder_q-layer.3": 1782.0778, "encoder_q-layer.4": 1865.7056, "encoder_q-layer.5": 1908.36, "encoder_q-layer.6": 1901.7152, "encoder_q-layer.7": 2088.7852, "encoder_q-layer.8": 2341.7837, "encoder_q-layer.9": 2196.0994, "epoch": 0.88, "inbatch_neg_score": 0.3902, "inbatch_pos_score": 1.0264, "learning_rate": 5.333333333333334e-06, "loss": 3.33, "norm_diff": 0.1249, "norm_loss": 0.0, "num_token_doc": 66.8473, "num_token_overlap": 14.6717, "num_token_query": 37.429, "num_token_union": 65.3965, "num_word_context": 202.055, "num_word_doc": 49.8921, "num_word_query": 28.0442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4055.7645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3906, "query_norm": 1.3612, "queue_k_norm": 1.4902, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.429, "sent_len_1": 66.8473, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.71, "stdk": 0.0487, "stdq": 0.0436, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.3409, "doc_norm": 1.4935, "encoder_q-embeddings": 2456.5774, "encoder_q-layer.0": 1696.262, "encoder_q-layer.1": 1795.5187, "encoder_q-layer.10": 2336.6248, "encoder_q-layer.11": 6303.2285, "encoder_q-layer.2": 2020.6865, "encoder_q-layer.3": 2052.5122, "encoder_q-layer.4": 2191.4592, "encoder_q-layer.5": 2308.426, "encoder_q-layer.6": 2510.4346, "encoder_q-layer.7": 2340.0481, "encoder_q-layer.8": 2598.9839, "encoder_q-layer.9": 2234.7031, "epoch": 0.88, "inbatch_neg_score": 0.3939, "inbatch_pos_score": 1.0752, "learning_rate": 5.277777777777778e-06, "loss": 3.3409, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.6073, "num_token_overlap": 14.6326, "num_token_query": 37.4238, "num_token_union": 65.3245, "num_word_context": 202.142, "num_word_doc": 49.7068, "num_word_query": 28.0145, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4292.2113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3928, "query_norm": 1.3855, "queue_k_norm": 1.4927, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4238, "sent_len_1": 66.6073, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9588, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.354, "doc_norm": 1.4873, "encoder_q-embeddings": 994.9054, "encoder_q-layer.0": 700.0725, "encoder_q-layer.1": 718.4558, "encoder_q-layer.10": 1308.9569, "encoder_q-layer.11": 3216.4675, "encoder_q-layer.2": 827.7678, "encoder_q-layer.3": 843.7044, "encoder_q-layer.4": 915.5032, "encoder_q-layer.5": 914.4794, "encoder_q-layer.6": 981.3621, "encoder_q-layer.7": 1107.7577, "encoder_q-layer.8": 1303.6514, "encoder_q-layer.9": 1222.1365, "epoch": 0.88, "inbatch_neg_score": 0.3947, "inbatch_pos_score": 1.0527, "learning_rate": 5.2222222222222226e-06, "loss": 3.354, "norm_diff": 0.1169, "norm_loss": 0.0, "num_token_doc": 66.6166, "num_token_overlap": 14.5481, "num_token_query": 37.2496, "num_token_union": 65.1816, "num_word_context": 201.8384, "num_word_doc": 49.6838, "num_word_query": 27.8555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2020.0453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3948, "query_norm": 1.3705, "queue_k_norm": 1.4912, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2496, "sent_len_1": 66.6166, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2475, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3458, "doc_norm": 1.4894, "encoder_q-embeddings": 3762.5769, "encoder_q-layer.0": 2693.7083, "encoder_q-layer.1": 3099.5259, "encoder_q-layer.10": 1174.6902, "encoder_q-layer.11": 3310.0146, "encoder_q-layer.2": 3200.9646, "encoder_q-layer.3": 3527.8071, "encoder_q-layer.4": 2987.7227, "encoder_q-layer.5": 2242.1992, "encoder_q-layer.6": 2329.5405, "encoder_q-layer.7": 2076.3389, "encoder_q-layer.8": 1792.2323, "encoder_q-layer.9": 1177.7281, "epoch": 0.89, "inbatch_neg_score": 0.393, "inbatch_pos_score": 1.0322, "learning_rate": 5.166666666666667e-06, "loss": 3.3458, "norm_diff": 0.1269, "norm_loss": 0.0, "num_token_doc": 66.7748, "num_token_overlap": 14.6821, "num_token_query": 37.5817, "num_token_union": 65.457, "num_word_context": 202.343, "num_word_doc": 49.8411, "num_word_query": 28.1459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4224.0961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 1.3625, "queue_k_norm": 1.4914, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5817, "sent_len_1": 66.7748, "sent_len_max_0": 127.9688, "sent_len_max_1": 187.7587, "stdk": 0.0488, "stdq": 0.0437, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3575, "doc_norm": 1.4845, "encoder_q-embeddings": 1109.9863, "encoder_q-layer.0": 764.5271, "encoder_q-layer.1": 797.3597, "encoder_q-layer.10": 1278.2908, "encoder_q-layer.11": 3254.011, "encoder_q-layer.2": 925.5074, "encoder_q-layer.3": 901.7659, "encoder_q-layer.4": 972.3937, "encoder_q-layer.5": 939.2927, "encoder_q-layer.6": 1113.1588, "encoder_q-layer.7": 1236.5062, "encoder_q-layer.8": 1337.2295, "encoder_q-layer.9": 1174.4652, "epoch": 0.89, "inbatch_neg_score": 0.3914, "inbatch_pos_score": 1.0391, "learning_rate": 5.1111111111111115e-06, "loss": 3.3575, "norm_diff": 0.1093, "norm_loss": 0.0, "num_token_doc": 66.7415, "num_token_overlap": 14.5485, "num_token_query": 37.3186, "num_token_union": 65.3814, "num_word_context": 202.4602, "num_word_doc": 49.8, "num_word_query": 27.9199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2117.107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3918, "query_norm": 1.3752, "queue_k_norm": 1.4914, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3186, "sent_len_1": 66.7415, "sent_len_max_0": 127.96, "sent_len_max_1": 189.3988, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.359, "doc_norm": 1.4915, "encoder_q-embeddings": 2796.8455, "encoder_q-layer.0": 2042.2026, "encoder_q-layer.1": 2338.8975, "encoder_q-layer.10": 1226.9978, "encoder_q-layer.11": 3226.6687, "encoder_q-layer.2": 2895.2053, "encoder_q-layer.3": 2936.7424, "encoder_q-layer.4": 2946.6807, "encoder_q-layer.5": 2913.6318, "encoder_q-layer.6": 2698.1025, "encoder_q-layer.7": 2394.8181, "encoder_q-layer.8": 1972.7404, "encoder_q-layer.9": 1267.0061, "epoch": 0.89, "inbatch_neg_score": 0.3935, "inbatch_pos_score": 1.0547, "learning_rate": 5.0555555555555555e-06, "loss": 3.359, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 67.0798, "num_token_overlap": 14.578, "num_token_query": 37.1434, "num_token_union": 65.4569, "num_word_context": 202.4225, "num_word_doc": 50.0223, "num_word_query": 27.7771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3885.5157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3938, "query_norm": 1.374, "queue_k_norm": 1.4932, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1434, "sent_len_1": 67.0798, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.9688, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3313, "doc_norm": 1.4935, "encoder_q-embeddings": 1725.2888, "encoder_q-layer.0": 1196.2113, "encoder_q-layer.1": 1312.358, "encoder_q-layer.10": 1355.1344, "encoder_q-layer.11": 3278.3572, "encoder_q-layer.2": 1633.0886, "encoder_q-layer.3": 1848.4236, "encoder_q-layer.4": 2004.0773, "encoder_q-layer.5": 1953.8942, "encoder_q-layer.6": 1996.1444, "encoder_q-layer.7": 1942.8604, "encoder_q-layer.8": 1654.5986, "encoder_q-layer.9": 1149.8264, "epoch": 0.89, "inbatch_neg_score": 0.3952, "inbatch_pos_score": 1.0566, "learning_rate": 5e-06, "loss": 3.3313, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 14.6133, "num_token_query": 37.3661, "num_token_union": 65.4483, "num_word_context": 202.6209, "num_word_doc": 49.9085, "num_word_query": 27.9707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2826.0304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3953, "query_norm": 1.3776, "queue_k_norm": 1.493, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3661, "sent_len_1": 66.8259, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.1037, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3456, "doc_norm": 1.4919, "encoder_q-embeddings": 1051.199, "encoder_q-layer.0": 728.3491, "encoder_q-layer.1": 776.8362, "encoder_q-layer.10": 1115.9585, "encoder_q-layer.11": 3126.9702, "encoder_q-layer.2": 897.3857, "encoder_q-layer.3": 902.2819, "encoder_q-layer.4": 935.7224, "encoder_q-layer.5": 934.201, "encoder_q-layer.6": 1010.3518, "encoder_q-layer.7": 1141.7424, "encoder_q-layer.8": 1248.3632, "encoder_q-layer.9": 1134.5609, "epoch": 0.89, "inbatch_neg_score": 0.3897, "inbatch_pos_score": 1.0605, "learning_rate": 4.9444444444444444e-06, "loss": 3.3456, "norm_diff": 0.1218, "norm_loss": 0.0, "num_token_doc": 66.9868, "num_token_overlap": 14.5759, "num_token_query": 37.2854, "num_token_union": 65.4831, "num_word_context": 202.5729, "num_word_doc": 50.013, "num_word_query": 27.9105, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2054.8761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3906, "query_norm": 1.3701, "queue_k_norm": 1.4937, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2854, "sent_len_1": 66.9868, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1475, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3403, "doc_norm": 1.4943, "encoder_q-embeddings": 2546.1748, "encoder_q-layer.0": 1795.0187, "encoder_q-layer.1": 2036.4364, "encoder_q-layer.10": 1246.1511, "encoder_q-layer.11": 3210.3894, "encoder_q-layer.2": 2159.3608, "encoder_q-layer.3": 2225.4387, "encoder_q-layer.4": 2400.5425, "encoder_q-layer.5": 2301.4978, "encoder_q-layer.6": 2217.9465, "encoder_q-layer.7": 2005.8682, "encoder_q-layer.8": 1575.4438, "encoder_q-layer.9": 1138.3322, "epoch": 0.89, "inbatch_neg_score": 0.3913, "inbatch_pos_score": 1.0605, "learning_rate": 4.888888888888889e-06, "loss": 3.3403, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.7173, "num_token_overlap": 14.6436, "num_token_query": 37.4491, "num_token_union": 65.3705, "num_word_context": 202.1957, "num_word_doc": 49.7797, "num_word_query": 28.0523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3275.9806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3916, "query_norm": 1.3766, "queue_k_norm": 1.4927, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4491, "sent_len_1": 66.7173, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.0838, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3525, "doc_norm": 1.4949, "encoder_q-embeddings": 1034.6771, "encoder_q-layer.0": 721.3099, "encoder_q-layer.1": 746.355, "encoder_q-layer.10": 1158.3811, "encoder_q-layer.11": 3067.9814, "encoder_q-layer.2": 854.9869, "encoder_q-layer.3": 884.0182, "encoder_q-layer.4": 924.1715, "encoder_q-layer.5": 966.7853, "encoder_q-layer.6": 1097.4391, "encoder_q-layer.7": 1168.2186, "encoder_q-layer.8": 1328.2039, "encoder_q-layer.9": 1141.2762, "epoch": 0.89, "inbatch_neg_score": 0.3918, "inbatch_pos_score": 1.0557, "learning_rate": 4.833333333333333e-06, "loss": 3.3525, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.9493, "num_token_overlap": 14.5961, "num_token_query": 37.4946, "num_token_union": 65.5389, "num_word_context": 202.5993, "num_word_doc": 49.9232, "num_word_query": 28.0986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2000.5936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3921, "query_norm": 1.3772, "queue_k_norm": 1.492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4946, "sent_len_1": 66.9493, "sent_len_max_0": 127.99, "sent_len_max_1": 188.8413, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3372, "doc_norm": 1.4904, "encoder_q-embeddings": 1319.1937, "encoder_q-layer.0": 860.5372, "encoder_q-layer.1": 953.1278, "encoder_q-layer.10": 1145.6586, "encoder_q-layer.11": 3152.3828, "encoder_q-layer.2": 1071.4285, "encoder_q-layer.3": 1094.7479, "encoder_q-layer.4": 1108.254, "encoder_q-layer.5": 1131.2886, "encoder_q-layer.6": 1177.8146, "encoder_q-layer.7": 1300.8606, "encoder_q-layer.8": 1360.155, "encoder_q-layer.9": 1174.286, "epoch": 0.89, "inbatch_neg_score": 0.3927, "inbatch_pos_score": 1.0674, "learning_rate": 4.777777777777778e-06, "loss": 3.3372, "norm_diff": 0.0996, "norm_loss": 0.0, "num_token_doc": 66.6946, "num_token_overlap": 14.5924, "num_token_query": 37.4, "num_token_union": 65.3621, "num_word_context": 202.2354, "num_word_doc": 49.7647, "num_word_query": 27.9946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 1.3908, "queue_k_norm": 1.4927, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4, "sent_len_1": 66.6946, "sent_len_max_0": 127.985, "sent_len_max_1": 190.2025, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3418, "doc_norm": 1.4933, "encoder_q-embeddings": 1747.2999, "encoder_q-layer.0": 1244.8336, "encoder_q-layer.1": 1302.7651, "encoder_q-layer.10": 1354.4316, "encoder_q-layer.11": 3200.4768, "encoder_q-layer.2": 1345.6189, "encoder_q-layer.3": 1416.9979, "encoder_q-layer.4": 1446.024, "encoder_q-layer.5": 1404.4954, "encoder_q-layer.6": 1487.9723, "encoder_q-layer.7": 1539.9307, "encoder_q-layer.8": 1665.5065, "encoder_q-layer.9": 1258.9938, "epoch": 0.89, "inbatch_neg_score": 0.3912, "inbatch_pos_score": 1.0312, "learning_rate": 4.722222222222222e-06, "loss": 3.3418, "norm_diff": 0.1202, "norm_loss": 0.0, "num_token_doc": 66.8131, "num_token_overlap": 14.6272, "num_token_query": 37.5329, "num_token_union": 65.482, "num_word_context": 202.4994, "num_word_doc": 49.835, "num_word_query": 28.1004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2528.976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.3731, "queue_k_norm": 1.4937, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5329, "sent_len_1": 66.8131, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4913, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3371, "doc_norm": 1.4974, "encoder_q-embeddings": 1410.4098, "encoder_q-layer.0": 962.4512, "encoder_q-layer.1": 1052.6058, "encoder_q-layer.10": 1198.0422, "encoder_q-layer.11": 3061.8452, "encoder_q-layer.2": 1183.942, "encoder_q-layer.3": 1232.0117, "encoder_q-layer.4": 1288.0597, "encoder_q-layer.5": 1440.9746, "encoder_q-layer.6": 1601.467, "encoder_q-layer.7": 1521.4575, "encoder_q-layer.8": 1427.7191, "encoder_q-layer.9": 1105.684, "epoch": 0.89, "inbatch_neg_score": 0.3915, "inbatch_pos_score": 1.04, "learning_rate": 4.666666666666667e-06, "loss": 3.3371, "norm_diff": 0.1209, "norm_loss": 0.0, "num_token_doc": 66.9827, "num_token_overlap": 14.6728, "num_token_query": 37.586, "num_token_union": 65.5673, "num_word_context": 202.442, "num_word_doc": 49.9824, "num_word_query": 28.1563, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2322.7253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3909, "query_norm": 1.3765, "queue_k_norm": 1.4936, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.586, "sent_len_1": 66.9827, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0588, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3556, "doc_norm": 1.4956, "encoder_q-embeddings": 972.5717, "encoder_q-layer.0": 645.1985, "encoder_q-layer.1": 660.1946, "encoder_q-layer.10": 1188.2092, "encoder_q-layer.11": 3238.2046, "encoder_q-layer.2": 748.7919, "encoder_q-layer.3": 748.8188, "encoder_q-layer.4": 815.623, "encoder_q-layer.5": 825.2228, "encoder_q-layer.6": 915.9717, "encoder_q-layer.7": 1040.7036, "encoder_q-layer.8": 1299.7191, "encoder_q-layer.9": 1144.095, "epoch": 0.9, "inbatch_neg_score": 0.3932, "inbatch_pos_score": 1.0508, "learning_rate": 4.611111111111111e-06, "loss": 3.3556, "norm_diff": 0.1261, "norm_loss": 0.0, "num_token_doc": 66.8641, "num_token_overlap": 14.5458, "num_token_query": 37.2555, "num_token_union": 65.4555, "num_word_context": 202.3428, "num_word_doc": 49.8956, "num_word_query": 27.8974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.5147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3911, "query_norm": 1.3695, "queue_k_norm": 1.4922, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2555, "sent_len_1": 66.8641, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3663, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3323, "doc_norm": 1.4944, "encoder_q-embeddings": 1582.9178, "encoder_q-layer.0": 1177.2084, "encoder_q-layer.1": 1386.5914, "encoder_q-layer.10": 1167.2587, "encoder_q-layer.11": 2910.2114, "encoder_q-layer.2": 1758.5852, "encoder_q-layer.3": 1894.401, "encoder_q-layer.4": 2027.1627, "encoder_q-layer.5": 2156.032, "encoder_q-layer.6": 2185.7717, "encoder_q-layer.7": 2430.6113, "encoder_q-layer.8": 2002.7385, "encoder_q-layer.9": 1124.126, "epoch": 0.9, "inbatch_neg_score": 0.392, "inbatch_pos_score": 1.0684, "learning_rate": 4.555555555555556e-06, "loss": 3.3323, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.8381, "num_token_overlap": 14.6305, "num_token_query": 37.4317, "num_token_union": 65.4426, "num_word_context": 202.2976, "num_word_doc": 49.8793, "num_word_query": 28.0415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2882.1296, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3916, "query_norm": 1.3795, "queue_k_norm": 1.4928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4317, "sent_len_1": 66.8381, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.7375, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3506, "doc_norm": 1.4899, "encoder_q-embeddings": 1097.6045, "encoder_q-layer.0": 751.3766, "encoder_q-layer.1": 837.272, "encoder_q-layer.10": 1134.403, "encoder_q-layer.11": 3031.188, "encoder_q-layer.2": 944.2891, "encoder_q-layer.3": 959.9269, "encoder_q-layer.4": 999.7779, "encoder_q-layer.5": 1026.3434, "encoder_q-layer.6": 1053.4425, "encoder_q-layer.7": 1202.3418, "encoder_q-layer.8": 1341.8804, "encoder_q-layer.9": 1113.1045, "epoch": 0.9, "inbatch_neg_score": 0.3922, "inbatch_pos_score": 1.0498, "learning_rate": 4.5e-06, "loss": 3.3506, "norm_diff": 0.1227, "norm_loss": 0.0, "num_token_doc": 66.614, "num_token_overlap": 14.556, "num_token_query": 37.2853, "num_token_union": 65.2599, "num_word_context": 202.0528, "num_word_doc": 49.7027, "num_word_query": 27.9298, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.9604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3918, "query_norm": 1.3672, "queue_k_norm": 1.4922, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2853, "sent_len_1": 66.614, "sent_len_max_0": 127.995, "sent_len_max_1": 191.41, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3575, "doc_norm": 1.4968, "encoder_q-embeddings": 1409.2521, "encoder_q-layer.0": 995.7415, "encoder_q-layer.1": 1057.1174, "encoder_q-layer.10": 1167.2775, "encoder_q-layer.11": 3126.4663, "encoder_q-layer.2": 1234.7721, "encoder_q-layer.3": 1324.1406, "encoder_q-layer.4": 1463.5388, "encoder_q-layer.5": 1403.6486, "encoder_q-layer.6": 1550.5858, "encoder_q-layer.7": 1782.9128, "encoder_q-layer.8": 1639.8423, "encoder_q-layer.9": 1161.3531, "epoch": 0.9, "inbatch_neg_score": 0.3931, "inbatch_pos_score": 1.0684, "learning_rate": 4.444444444444445e-06, "loss": 3.3575, "norm_diff": 0.1145, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 14.5711, "num_token_query": 37.2636, "num_token_union": 65.3438, "num_word_context": 202.087, "num_word_doc": 49.8146, "num_word_query": 27.8954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2425.6999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3928, "query_norm": 1.3822, "queue_k_norm": 1.4909, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2636, "sent_len_1": 66.8057, "sent_len_max_0": 127.9613, "sent_len_max_1": 192.52, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3399, "doc_norm": 1.4963, "encoder_q-embeddings": 1452.8682, "encoder_q-layer.0": 993.2718, "encoder_q-layer.1": 1073.0413, "encoder_q-layer.10": 1189.5715, "encoder_q-layer.11": 3228.199, "encoder_q-layer.2": 1209.5497, "encoder_q-layer.3": 1211.2465, "encoder_q-layer.4": 1312.5424, "encoder_q-layer.5": 1308.6597, "encoder_q-layer.6": 1325.8196, "encoder_q-layer.7": 1217.8223, "encoder_q-layer.8": 1270.8544, "encoder_q-layer.9": 1150.1359, "epoch": 0.9, "inbatch_neg_score": 0.3917, "inbatch_pos_score": 1.0332, "learning_rate": 4.388888888888889e-06, "loss": 3.3399, "norm_diff": 0.1269, "norm_loss": 0.0, "num_token_doc": 66.7795, "num_token_overlap": 14.6872, "num_token_query": 37.5595, "num_token_union": 65.4515, "num_word_context": 202.2379, "num_word_doc": 49.8112, "num_word_query": 28.1458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2331.8028, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3928, "query_norm": 1.3694, "queue_k_norm": 1.4924, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.5595, "sent_len_1": 66.7795, "sent_len_max_0": 128.0, "sent_len_max_1": 188.77, "stdk": 0.049, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3598, "doc_norm": 1.4882, "encoder_q-embeddings": 1062.418, "encoder_q-layer.0": 730.7213, "encoder_q-layer.1": 761.6741, "encoder_q-layer.10": 1177.877, "encoder_q-layer.11": 3273.1501, "encoder_q-layer.2": 867.4748, "encoder_q-layer.3": 874.6203, "encoder_q-layer.4": 889.1656, "encoder_q-layer.5": 891.8492, "encoder_q-layer.6": 985.8982, "encoder_q-layer.7": 1144.2922, "encoder_q-layer.8": 1269.6085, "encoder_q-layer.9": 1124.6951, "epoch": 0.9, "inbatch_neg_score": 0.3949, "inbatch_pos_score": 1.0459, "learning_rate": 4.333333333333334e-06, "loss": 3.3598, "norm_diff": 0.1152, "norm_loss": 0.0, "num_token_doc": 66.7679, "num_token_overlap": 14.5428, "num_token_query": 37.1312, "num_token_union": 65.3106, "num_word_context": 201.8944, "num_word_doc": 49.7859, "num_word_query": 27.791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2117.4131, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.394, "query_norm": 1.373, "queue_k_norm": 1.4931, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1312, "sent_len_1": 66.7679, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5337, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.3461, "doc_norm": 1.4913, "encoder_q-embeddings": 1077.717, "encoder_q-layer.0": 755.7248, "encoder_q-layer.1": 804.0566, "encoder_q-layer.10": 1201.8018, "encoder_q-layer.11": 3249.4346, "encoder_q-layer.2": 924.5461, "encoder_q-layer.3": 943.1133, "encoder_q-layer.4": 936.3271, "encoder_q-layer.5": 916.8629, "encoder_q-layer.6": 1006.7883, "encoder_q-layer.7": 1083.6483, "encoder_q-layer.8": 1269.0604, "encoder_q-layer.9": 1169.2883, "epoch": 0.9, "inbatch_neg_score": 0.3941, "inbatch_pos_score": 1.0459, "learning_rate": 4.277777777777778e-06, "loss": 3.3461, "norm_diff": 0.1156, "norm_loss": 0.0, "num_token_doc": 66.6629, "num_token_overlap": 14.6002, "num_token_query": 37.2865, "num_token_union": 65.2657, "num_word_context": 202.1729, "num_word_doc": 49.7216, "num_word_query": 27.9286, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2091.4091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3936, "query_norm": 1.3758, "queue_k_norm": 1.4917, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.2865, "sent_len_1": 66.6629, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2775, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.341, "doc_norm": 1.4875, "encoder_q-embeddings": 1199.9553, "encoder_q-layer.0": 834.3575, "encoder_q-layer.1": 915.0661, "encoder_q-layer.10": 1215.9924, "encoder_q-layer.11": 3212.8093, "encoder_q-layer.2": 1046.0143, "encoder_q-layer.3": 1103.5939, "encoder_q-layer.4": 1180.3967, "encoder_q-layer.5": 1184.7517, "encoder_q-layer.6": 1369.4922, "encoder_q-layer.7": 1432.2471, "encoder_q-layer.8": 1404.9822, "encoder_q-layer.9": 1106.6316, "epoch": 0.9, "inbatch_neg_score": 0.3938, "inbatch_pos_score": 1.0352, "learning_rate": 4.222222222222223e-06, "loss": 3.341, "norm_diff": 0.1196, "norm_loss": 0.0, "num_token_doc": 66.7822, "num_token_overlap": 14.6171, "num_token_query": 37.3235, "num_token_union": 65.3325, "num_word_context": 202.4177, "num_word_doc": 49.8232, "num_word_query": 27.9371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2225.3494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.394, "query_norm": 1.3679, "queue_k_norm": 1.4931, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3235, "sent_len_1": 66.7822, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1337, "stdk": 0.0486, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.3437, "doc_norm": 1.4872, "encoder_q-embeddings": 1086.1976, "encoder_q-layer.0": 733.0726, "encoder_q-layer.1": 762.899, "encoder_q-layer.10": 1225.455, "encoder_q-layer.11": 3336.542, "encoder_q-layer.2": 851.8446, "encoder_q-layer.3": 868.9814, "encoder_q-layer.4": 902.0851, "encoder_q-layer.5": 887.7584, "encoder_q-layer.6": 995.3715, "encoder_q-layer.7": 1200.3831, "encoder_q-layer.8": 1313.3875, "encoder_q-layer.9": 1134.2811, "epoch": 0.9, "inbatch_neg_score": 0.3949, "inbatch_pos_score": 1.0146, "learning_rate": 4.166666666666667e-06, "loss": 3.3437, "norm_diff": 0.1263, "norm_loss": 0.0, "num_token_doc": 66.5174, "num_token_overlap": 14.5809, "num_token_query": 37.2946, "num_token_union": 65.23, "num_word_context": 201.9686, "num_word_doc": 49.6842, "num_word_query": 27.9124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.9607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3955, "query_norm": 1.3609, "queue_k_norm": 1.491, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2946, "sent_len_1": 66.5174, "sent_len_max_0": 128.0, "sent_len_max_1": 187.495, "stdk": 0.0487, "stdq": 0.0437, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.3457, "doc_norm": 1.4951, "encoder_q-embeddings": 2079.2664, "encoder_q-layer.0": 1393.4679, "encoder_q-layer.1": 1477.8352, "encoder_q-layer.10": 2318.2593, "encoder_q-layer.11": 6472.9341, "encoder_q-layer.2": 1686.4695, "encoder_q-layer.3": 1722.7112, "encoder_q-layer.4": 1910.0801, "encoder_q-layer.5": 1964.2041, "encoder_q-layer.6": 2168.915, "encoder_q-layer.7": 2429.5122, "encoder_q-layer.8": 2599.4958, "encoder_q-layer.9": 2229.0017, "epoch": 0.9, "inbatch_neg_score": 0.391, "inbatch_pos_score": 1.0742, "learning_rate": 4.111111111111112e-06, "loss": 3.3457, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.7654, "num_token_overlap": 14.5324, "num_token_query": 37.2064, "num_token_union": 65.3508, "num_word_context": 201.9717, "num_word_doc": 49.8099, "num_word_query": 27.8608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4164.5433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3916, "query_norm": 1.3781, "queue_k_norm": 1.4918, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2064, "sent_len_1": 66.7654, "sent_len_max_0": 127.9925, "sent_len_max_1": 193.1025, "stdk": 0.049, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3261, "doc_norm": 1.4966, "encoder_q-embeddings": 3872.2314, "encoder_q-layer.0": 2822.1062, "encoder_q-layer.1": 2942.9968, "encoder_q-layer.10": 2626.8721, "encoder_q-layer.11": 6592.8545, "encoder_q-layer.2": 3346.3931, "encoder_q-layer.3": 3416.1528, "encoder_q-layer.4": 3646.6858, "encoder_q-layer.5": 3624.9983, "encoder_q-layer.6": 3440.5093, "encoder_q-layer.7": 4390.8501, "encoder_q-layer.8": 3598.272, "encoder_q-layer.9": 2445.8887, "epoch": 0.91, "inbatch_neg_score": 0.3918, "inbatch_pos_score": 1.0205, "learning_rate": 4.055555555555556e-06, "loss": 3.3261, "norm_diff": 0.1302, "norm_loss": 0.0, "num_token_doc": 66.7004, "num_token_overlap": 14.6466, "num_token_query": 37.4154, "num_token_union": 65.3742, "num_word_context": 202.0248, "num_word_doc": 49.7506, "num_word_query": 27.9943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5722.3557, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3916, "query_norm": 1.3663, "queue_k_norm": 1.4916, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4154, "sent_len_1": 66.7004, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3162, "stdk": 0.049, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3149, "doc_norm": 1.4923, "encoder_q-embeddings": 3918.3708, "encoder_q-layer.0": 2758.8691, "encoder_q-layer.1": 2997.9692, "encoder_q-layer.10": 2614.6914, "encoder_q-layer.11": 6712.5015, "encoder_q-layer.2": 3264.7185, "encoder_q-layer.3": 3343.3962, "encoder_q-layer.4": 3393.3552, "encoder_q-layer.5": 3224.6562, "encoder_q-layer.6": 3570.863, "encoder_q-layer.7": 3450.0066, "encoder_q-layer.8": 3306.5166, "encoder_q-layer.9": 2410.7571, "epoch": 0.91, "inbatch_neg_score": 0.3927, "inbatch_pos_score": 1.04, "learning_rate": 4.000000000000001e-06, "loss": 3.3149, "norm_diff": 0.1273, "norm_loss": 0.0, "num_token_doc": 66.862, "num_token_overlap": 14.6849, "num_token_query": 37.5804, "num_token_union": 65.48, "num_word_context": 202.3513, "num_word_doc": 49.8899, "num_word_query": 28.1501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5650.501, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3928, "query_norm": 1.365, "queue_k_norm": 1.4935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5804, "sent_len_1": 66.862, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0312, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.3413, "doc_norm": 1.4918, "encoder_q-embeddings": 2254.3601, "encoder_q-layer.0": 1511.6279, "encoder_q-layer.1": 1625.9086, "encoder_q-layer.10": 2365.8616, "encoder_q-layer.11": 6635.439, "encoder_q-layer.2": 1836.9818, "encoder_q-layer.3": 1853.24, "encoder_q-layer.4": 2056.6606, "encoder_q-layer.5": 2071.0513, "encoder_q-layer.6": 2149.7427, "encoder_q-layer.7": 2293.6057, "encoder_q-layer.8": 2555.7598, "encoder_q-layer.9": 2312.6743, "epoch": 0.91, "inbatch_neg_score": 0.3917, "inbatch_pos_score": 1.0469, "learning_rate": 3.944444444444445e-06, "loss": 3.3413, "norm_diff": 0.1188, "norm_loss": 0.0, "num_token_doc": 66.9947, "num_token_overlap": 14.5539, "num_token_query": 37.0626, "num_token_union": 65.3917, "num_word_context": 202.2639, "num_word_doc": 49.9773, "num_word_query": 27.7328, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4390.8629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.373, "queue_k_norm": 1.4947, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.0626, "sent_len_1": 66.9947, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3688, "stdk": 0.0488, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.3346, "doc_norm": 1.4966, "encoder_q-embeddings": 5858.1191, "encoder_q-layer.0": 4116.0, "encoder_q-layer.1": 4567.3145, "encoder_q-layer.10": 2418.3572, "encoder_q-layer.11": 6246.9404, "encoder_q-layer.2": 5660.3867, "encoder_q-layer.3": 5808.5317, "encoder_q-layer.4": 6267.6753, "encoder_q-layer.5": 5556.2441, "encoder_q-layer.6": 6091.4502, "encoder_q-layer.7": 6473.0088, "encoder_q-layer.8": 4769.1016, "encoder_q-layer.9": 2389.7661, "epoch": 0.91, "inbatch_neg_score": 0.3896, "inbatch_pos_score": 1.0566, "learning_rate": 3.888888888888889e-06, "loss": 3.3346, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.5509, "num_token_overlap": 14.5422, "num_token_query": 37.3206, "num_token_union": 65.2625, "num_word_context": 202.2251, "num_word_doc": 49.6654, "num_word_query": 27.9521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7984.1351, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3906, "query_norm": 1.3832, "queue_k_norm": 1.4928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3206, "sent_len_1": 66.5509, "sent_len_max_0": 128.0, "sent_len_max_1": 189.05, "stdk": 0.049, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.3372, "doc_norm": 1.4907, "encoder_q-embeddings": 3748.6179, "encoder_q-layer.0": 2664.9348, "encoder_q-layer.1": 2665.5286, "encoder_q-layer.10": 2454.9399, "encoder_q-layer.11": 6652.2202, "encoder_q-layer.2": 3169.5703, "encoder_q-layer.3": 3367.2522, "encoder_q-layer.4": 3587.6853, "encoder_q-layer.5": 3588.0688, "encoder_q-layer.6": 3565.2476, "encoder_q-layer.7": 3962.1426, "encoder_q-layer.8": 3587.1311, "encoder_q-layer.9": 2338.3455, "epoch": 0.91, "inbatch_neg_score": 0.3914, "inbatch_pos_score": 1.0332, "learning_rate": 3.833333333333334e-06, "loss": 3.3372, "norm_diff": 0.1225, "norm_loss": 0.0, "num_token_doc": 66.6731, "num_token_overlap": 14.5921, "num_token_query": 37.3956, "num_token_union": 65.3646, "num_word_context": 202.1903, "num_word_doc": 49.782, "num_word_query": 28.0059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5628.9037, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3901, "query_norm": 1.3682, "queue_k_norm": 1.4936, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3956, "sent_len_1": 66.6731, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1025, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3644, "doc_norm": 1.4908, "encoder_q-embeddings": 2415.8386, "encoder_q-layer.0": 1629.816, "encoder_q-layer.1": 1753.662, "encoder_q-layer.10": 2240.7783, "encoder_q-layer.11": 6001.9126, "encoder_q-layer.2": 2022.5377, "encoder_q-layer.3": 2084.418, "encoder_q-layer.4": 2210.158, "encoder_q-layer.5": 2112.3992, "encoder_q-layer.6": 2320.3247, "encoder_q-layer.7": 2405.2317, "encoder_q-layer.8": 2614.3384, "encoder_q-layer.9": 2200.2273, "epoch": 0.91, "inbatch_neg_score": 0.3933, "inbatch_pos_score": 1.0654, "learning_rate": 3.777777777777778e-06, "loss": 3.3644, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.8706, "num_token_overlap": 14.5309, "num_token_query": 37.1596, "num_token_union": 65.3616, "num_word_context": 202.105, "num_word_doc": 49.8826, "num_word_query": 27.81, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4228.5628, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3926, "query_norm": 1.3709, "queue_k_norm": 1.4947, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1596, "sent_len_1": 66.8706, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4863, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3257, "doc_norm": 1.4939, "encoder_q-embeddings": 2732.2246, "encoder_q-layer.0": 1927.709, "encoder_q-layer.1": 2274.5366, "encoder_q-layer.10": 2459.415, "encoder_q-layer.11": 6444.2163, "encoder_q-layer.2": 2793.3748, "encoder_q-layer.3": 2620.8274, "encoder_q-layer.4": 2697.0718, "encoder_q-layer.5": 2821.0632, "encoder_q-layer.6": 2642.8516, "encoder_q-layer.7": 2580.6863, "encoder_q-layer.8": 2728.9211, "encoder_q-layer.9": 2351.7859, "epoch": 0.91, "inbatch_neg_score": 0.3904, "inbatch_pos_score": 1.041, "learning_rate": 3.722222222222222e-06, "loss": 3.3257, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 66.9031, "num_token_overlap": 14.6126, "num_token_query": 37.2654, "num_token_union": 65.3951, "num_word_context": 202.1444, "num_word_doc": 49.9416, "num_word_query": 27.8741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4770.3462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3909, "query_norm": 1.3727, "queue_k_norm": 1.4939, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2654, "sent_len_1": 66.9031, "sent_len_max_0": 128.0, "sent_len_max_1": 189.205, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.3492, "doc_norm": 1.4928, "encoder_q-embeddings": 3712.9321, "encoder_q-layer.0": 2523.3096, "encoder_q-layer.1": 2817.3196, "encoder_q-layer.10": 2211.2368, "encoder_q-layer.11": 6048.5459, "encoder_q-layer.2": 3191.8418, "encoder_q-layer.3": 3363.3889, "encoder_q-layer.4": 3792.0542, "encoder_q-layer.5": 3493.3262, "encoder_q-layer.6": 3744.3367, "encoder_q-layer.7": 3091.4458, "encoder_q-layer.8": 2773.3682, "encoder_q-layer.9": 2125.5688, "epoch": 0.91, "inbatch_neg_score": 0.3927, "inbatch_pos_score": 1.042, "learning_rate": 3.666666666666667e-06, "loss": 3.3492, "norm_diff": 0.1249, "norm_loss": 0.0, "num_token_doc": 67.0162, "num_token_overlap": 14.5486, "num_token_query": 37.3006, "num_token_union": 65.5298, "num_word_context": 202.357, "num_word_doc": 50.0311, "num_word_query": 27.9393, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5377.4539, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3921, "query_norm": 1.3679, "queue_k_norm": 1.4931, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3006, "sent_len_1": 67.0162, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.3162, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3329, "doc_norm": 1.4942, "encoder_q-embeddings": 1831.4464, "encoder_q-layer.0": 1235.223, "encoder_q-layer.1": 1300.9905, "encoder_q-layer.10": 2522.0242, "encoder_q-layer.11": 6448.0176, "encoder_q-layer.2": 1519.3993, "encoder_q-layer.3": 1517.6678, "encoder_q-layer.4": 1565.5421, "encoder_q-layer.5": 1634.2322, "encoder_q-layer.6": 1862.2832, "encoder_q-layer.7": 2052.6792, "encoder_q-layer.8": 2454.6118, "encoder_q-layer.9": 2211.4592, "epoch": 0.91, "inbatch_neg_score": 0.3919, "inbatch_pos_score": 1.0479, "learning_rate": 3.611111111111111e-06, "loss": 3.3329, "norm_diff": 0.1258, "norm_loss": 0.0, "num_token_doc": 66.9114, "num_token_overlap": 14.5899, "num_token_query": 37.4084, "num_token_union": 65.5278, "num_word_context": 202.6405, "num_word_doc": 49.9414, "num_word_query": 28.0091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4046.5716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.3684, "queue_k_norm": 1.4954, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4084, "sent_len_1": 66.9114, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.67, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3422, "doc_norm": 1.4985, "encoder_q-embeddings": 2751.0574, "encoder_q-layer.0": 1981.6204, "encoder_q-layer.1": 2036.5742, "encoder_q-layer.10": 2480.0811, "encoder_q-layer.11": 6320.3301, "encoder_q-layer.2": 2342.7883, "encoder_q-layer.3": 2509.4143, "encoder_q-layer.4": 2787.2375, "encoder_q-layer.5": 2750.8501, "encoder_q-layer.6": 3022.7341, "encoder_q-layer.7": 3337.2546, "encoder_q-layer.8": 3132.2993, "encoder_q-layer.9": 2305.0686, "epoch": 0.91, "inbatch_neg_score": 0.3938, "inbatch_pos_score": 1.0371, "learning_rate": 3.555555555555556e-06, "loss": 3.3422, "norm_diff": 0.1273, "norm_loss": 0.0, "num_token_doc": 66.7871, "num_token_overlap": 14.5888, "num_token_query": 37.463, "num_token_union": 65.4559, "num_word_context": 202.3884, "num_word_doc": 49.7771, "num_word_query": 28.0434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4798.0681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3936, "query_norm": 1.3712, "queue_k_norm": 1.4938, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.463, "sent_len_1": 66.7871, "sent_len_max_0": 128.0, "sent_len_max_1": 192.035, "stdk": 0.0491, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3344, "doc_norm": 1.4989, "encoder_q-embeddings": 4364.8677, "encoder_q-layer.0": 3129.3945, "encoder_q-layer.1": 3537.8083, "encoder_q-layer.10": 2227.1506, "encoder_q-layer.11": 6453.5352, "encoder_q-layer.2": 4211.8022, "encoder_q-layer.3": 4793.1631, "encoder_q-layer.4": 5403.3667, "encoder_q-layer.5": 4861.1133, "encoder_q-layer.6": 4948.0586, "encoder_q-layer.7": 4455.6904, "encoder_q-layer.8": 3774.7625, "encoder_q-layer.9": 2333.0503, "epoch": 0.91, "inbatch_neg_score": 0.3925, "inbatch_pos_score": 1.0596, "learning_rate": 3.5000000000000004e-06, "loss": 3.3344, "norm_diff": 0.1263, "norm_loss": 0.0, "num_token_doc": 66.875, "num_token_overlap": 14.6319, "num_token_query": 37.4272, "num_token_union": 65.4507, "num_word_context": 202.5695, "num_word_doc": 49.9078, "num_word_query": 28.0101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6676.5732, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.394, "query_norm": 1.3726, "queue_k_norm": 1.4917, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4272, "sent_len_1": 66.875, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.2775, "stdk": 0.0491, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3296, "doc_norm": 1.4976, "encoder_q-embeddings": 3492.7161, "encoder_q-layer.0": 2439.5112, "encoder_q-layer.1": 2763.0188, "encoder_q-layer.10": 2723.2871, "encoder_q-layer.11": 6639.4844, "encoder_q-layer.2": 3244.3296, "encoder_q-layer.3": 3256.772, "encoder_q-layer.4": 3314.572, "encoder_q-layer.5": 2954.1206, "encoder_q-layer.6": 2594.4092, "encoder_q-layer.7": 2326.0315, "encoder_q-layer.8": 2574.4583, "encoder_q-layer.9": 2317.0962, "epoch": 0.92, "inbatch_neg_score": 0.3946, "inbatch_pos_score": 1.0605, "learning_rate": 3.4444444444444444e-06, "loss": 3.3296, "norm_diff": 0.1202, "norm_loss": 0.0, "num_token_doc": 67.0818, "num_token_overlap": 14.6353, "num_token_query": 37.453, "num_token_union": 65.5828, "num_word_context": 202.8166, "num_word_doc": 50.0387, "num_word_query": 28.0668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5169.6208, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3948, "query_norm": 1.3774, "queue_k_norm": 1.4938, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.453, "sent_len_1": 67.0818, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.755, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3613, "doc_norm": 1.4957, "encoder_q-embeddings": 2726.3848, "encoder_q-layer.0": 1906.119, "encoder_q-layer.1": 2088.4692, "encoder_q-layer.10": 2366.0103, "encoder_q-layer.11": 5999.4043, "encoder_q-layer.2": 2445.9722, "encoder_q-layer.3": 2557.252, "encoder_q-layer.4": 2643.082, "encoder_q-layer.5": 2620.9316, "encoder_q-layer.6": 2988.5332, "encoder_q-layer.7": 3039.1504, "encoder_q-layer.8": 2928.2971, "encoder_q-layer.9": 2191.4583, "epoch": 0.92, "inbatch_neg_score": 0.392, "inbatch_pos_score": 1.0518, "learning_rate": 3.3888888888888893e-06, "loss": 3.3613, "norm_diff": 0.1249, "norm_loss": 0.0, "num_token_doc": 66.7174, "num_token_overlap": 14.492, "num_token_query": 37.276, "num_token_union": 65.3797, "num_word_context": 202.4307, "num_word_doc": 49.7716, "num_word_query": 27.9147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4574.8444, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.3708, "queue_k_norm": 1.4933, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.276, "sent_len_1": 66.7174, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1637, "stdk": 0.049, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3392, "doc_norm": 1.4938, "encoder_q-embeddings": 8271.8154, "encoder_q-layer.0": 6276.1831, "encoder_q-layer.1": 7937.3555, "encoder_q-layer.10": 2585.2292, "encoder_q-layer.11": 6375.9028, "encoder_q-layer.2": 10558.4365, "encoder_q-layer.3": 10568.7959, "encoder_q-layer.4": 11763.8125, "encoder_q-layer.5": 10363.209, "encoder_q-layer.6": 8922.4326, "encoder_q-layer.7": 6994.1523, "encoder_q-layer.8": 6246.8794, "encoder_q-layer.9": 2600.4922, "epoch": 0.92, "inbatch_neg_score": 0.395, "inbatch_pos_score": 1.0703, "learning_rate": 3.3333333333333333e-06, "loss": 3.3392, "norm_diff": 0.1186, "norm_loss": 0.0, "num_token_doc": 66.7828, "num_token_overlap": 14.5899, "num_token_query": 37.3106, "num_token_union": 65.356, "num_word_context": 202.0954, "num_word_doc": 49.7928, "num_word_query": 27.9421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12797.9385, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.394, "query_norm": 1.3751, "queue_k_norm": 1.4942, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3106, "sent_len_1": 66.7828, "sent_len_max_0": 127.9762, "sent_len_max_1": 191.41, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3575, "doc_norm": 1.4898, "encoder_q-embeddings": 2513.4966, "encoder_q-layer.0": 1737.6627, "encoder_q-layer.1": 1850.1876, "encoder_q-layer.10": 2423.0757, "encoder_q-layer.11": 6555.375, "encoder_q-layer.2": 2142.3267, "encoder_q-layer.3": 2186.3464, "encoder_q-layer.4": 2313.7566, "encoder_q-layer.5": 2247.24, "encoder_q-layer.6": 2321.7361, "encoder_q-layer.7": 2434.2939, "encoder_q-layer.8": 2759.604, "encoder_q-layer.9": 2326.4155, "epoch": 0.92, "inbatch_neg_score": 0.3939, "inbatch_pos_score": 1.0391, "learning_rate": 3.277777777777778e-06, "loss": 3.3575, "norm_diff": 0.1236, "norm_loss": 0.0, "num_token_doc": 66.8929, "num_token_overlap": 14.5492, "num_token_query": 37.2795, "num_token_union": 65.4481, "num_word_context": 202.5263, "num_word_doc": 49.8985, "num_word_query": 27.9042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4446.2464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.3662, "queue_k_norm": 1.4936, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2795, "sent_len_1": 66.8929, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.2937, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3453, "doc_norm": 1.4967, "encoder_q-embeddings": 2332.1406, "encoder_q-layer.0": 1582.4517, "encoder_q-layer.1": 1681.9644, "encoder_q-layer.10": 2640.6206, "encoder_q-layer.11": 6667.0908, "encoder_q-layer.2": 1863.7863, "encoder_q-layer.3": 1937.354, "encoder_q-layer.4": 1938.0917, "encoder_q-layer.5": 2173.6238, "encoder_q-layer.6": 2323.9692, "encoder_q-layer.7": 2400.3435, "encoder_q-layer.8": 2756.0911, "encoder_q-layer.9": 2553.875, "epoch": 0.92, "inbatch_neg_score": 0.3927, "inbatch_pos_score": 1.041, "learning_rate": 3.2222222222222222e-06, "loss": 3.3453, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.78, "num_token_overlap": 14.6554, "num_token_query": 37.4044, "num_token_union": 65.3522, "num_word_context": 202.3926, "num_word_doc": 49.8318, "num_word_query": 28.0071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4359.7535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.3808, "queue_k_norm": 1.493, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4044, "sent_len_1": 66.78, "sent_len_max_0": 128.0, "sent_len_max_1": 189.61, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3381, "doc_norm": 1.4936, "encoder_q-embeddings": 3228.2378, "encoder_q-layer.0": 2679.771, "encoder_q-layer.1": 2659.2751, "encoder_q-layer.10": 2328.8435, "encoder_q-layer.11": 6327.73, "encoder_q-layer.2": 2958.6353, "encoder_q-layer.3": 2796.5933, "encoder_q-layer.4": 2810.1643, "encoder_q-layer.5": 2662.4307, "encoder_q-layer.6": 2600.4983, "encoder_q-layer.7": 2779.4849, "encoder_q-layer.8": 2752.5549, "encoder_q-layer.9": 2235.2583, "epoch": 0.92, "inbatch_neg_score": 0.3955, "inbatch_pos_score": 1.0771, "learning_rate": 3.166666666666667e-06, "loss": 3.3381, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 14.5638, "num_token_query": 37.335, "num_token_union": 65.4429, "num_word_context": 202.6353, "num_word_doc": 49.7951, "num_word_query": 27.9361, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4937.4221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.395, "query_norm": 1.3766, "queue_k_norm": 1.4936, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.335, "sent_len_1": 66.7954, "sent_len_max_0": 128.0, "sent_len_max_1": 191.17, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3395, "doc_norm": 1.4937, "encoder_q-embeddings": 2384.8555, "encoder_q-layer.0": 1616.7545, "encoder_q-layer.1": 1760.3285, "encoder_q-layer.10": 2358.5742, "encoder_q-layer.11": 6179.1748, "encoder_q-layer.2": 2076.0771, "encoder_q-layer.3": 1963.6241, "encoder_q-layer.4": 2019.7468, "encoder_q-layer.5": 2006.4036, "encoder_q-layer.6": 2346.8892, "encoder_q-layer.7": 2262.3655, "encoder_q-layer.8": 2534.6995, "encoder_q-layer.9": 2161.0837, "epoch": 0.92, "inbatch_neg_score": 0.3972, "inbatch_pos_score": 1.0518, "learning_rate": 3.111111111111111e-06, "loss": 3.3395, "norm_diff": 0.129, "norm_loss": 0.0, "num_token_doc": 66.7235, "num_token_overlap": 14.5881, "num_token_query": 37.3133, "num_token_union": 65.2982, "num_word_context": 202.1314, "num_word_doc": 49.7737, "num_word_query": 27.9373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4226.7305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3962, "query_norm": 1.3647, "queue_k_norm": 1.4921, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3133, "sent_len_1": 66.7235, "sent_len_max_0": 128.0, "sent_len_max_1": 188.935, "stdk": 0.0489, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.3329, "doc_norm": 1.4898, "encoder_q-embeddings": 3869.0854, "encoder_q-layer.0": 2798.79, "encoder_q-layer.1": 3330.4402, "encoder_q-layer.10": 2162.4133, "encoder_q-layer.11": 6369.8584, "encoder_q-layer.2": 4134.2979, "encoder_q-layer.3": 4161.2119, "encoder_q-layer.4": 4347.6899, "encoder_q-layer.5": 4313.8149, "encoder_q-layer.6": 4081.2166, "encoder_q-layer.7": 3922.3247, "encoder_q-layer.8": 3413.9751, "encoder_q-layer.9": 2274.9375, "epoch": 0.92, "inbatch_neg_score": 0.3956, "inbatch_pos_score": 1.0469, "learning_rate": 3.0555555555555556e-06, "loss": 3.3329, "norm_diff": 0.1202, "norm_loss": 0.0, "num_token_doc": 66.9826, "num_token_overlap": 14.5842, "num_token_query": 37.3405, "num_token_union": 65.4729, "num_word_context": 202.6404, "num_word_doc": 49.9608, "num_word_query": 27.9587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6013.711, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3948, "query_norm": 1.3696, "queue_k_norm": 1.4953, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.3405, "sent_len_1": 66.9826, "sent_len_max_0": 127.99, "sent_len_max_1": 191.0938, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3293, "doc_norm": 1.493, "encoder_q-embeddings": 8005.9087, "encoder_q-layer.0": 6040.2598, "encoder_q-layer.1": 6829.8584, "encoder_q-layer.10": 4653.8242, "encoder_q-layer.11": 12523.124, "encoder_q-layer.2": 7464.915, "encoder_q-layer.3": 7603.853, "encoder_q-layer.4": 7154.3853, "encoder_q-layer.5": 6123.749, "encoder_q-layer.6": 5642.6675, "encoder_q-layer.7": 5729.4717, "encoder_q-layer.8": 5673.6626, "encoder_q-layer.9": 4604.2578, "epoch": 0.92, "inbatch_neg_score": 0.3961, "inbatch_pos_score": 1.0547, "learning_rate": 3e-06, "loss": 3.3293, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.923, "num_token_overlap": 14.6312, "num_token_query": 37.4766, "num_token_union": 65.5269, "num_word_context": 202.5549, "num_word_doc": 49.9286, "num_word_query": 28.0646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10947.2998, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3962, "query_norm": 1.3752, "queue_k_norm": 1.4947, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4766, "sent_len_1": 66.923, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5863, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3431, "doc_norm": 1.4913, "encoder_q-embeddings": 6489.1743, "encoder_q-layer.0": 4365.2407, "encoder_q-layer.1": 4821.8779, "encoder_q-layer.10": 4853.0581, "encoder_q-layer.11": 12849.7998, "encoder_q-layer.2": 5553.0376, "encoder_q-layer.3": 5627.4312, "encoder_q-layer.4": 5873.8008, "encoder_q-layer.5": 5754.6582, "encoder_q-layer.6": 6212.4199, "encoder_q-layer.7": 6846.9351, "encoder_q-layer.8": 6618.8374, "encoder_q-layer.9": 4918.5366, "epoch": 0.92, "inbatch_neg_score": 0.3934, "inbatch_pos_score": 1.0439, "learning_rate": 2.9444444444444445e-06, "loss": 3.3431, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.8346, "num_token_overlap": 14.6081, "num_token_query": 37.4597, "num_token_union": 65.5009, "num_word_context": 202.2519, "num_word_doc": 49.8721, "num_word_query": 28.0526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10211.1012, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3938, "query_norm": 1.3718, "queue_k_norm": 1.4942, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4597, "sent_len_1": 66.8346, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8288, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3299, "doc_norm": 1.4955, "encoder_q-embeddings": 3171.9033, "encoder_q-layer.0": 2152.7009, "encoder_q-layer.1": 2415.1484, "encoder_q-layer.10": 2647.2104, "encoder_q-layer.11": 6602.5029, "encoder_q-layer.2": 2797.6792, "encoder_q-layer.3": 2705.375, "encoder_q-layer.4": 2723.1055, "encoder_q-layer.5": 2687.6323, "encoder_q-layer.6": 2874.7961, "encoder_q-layer.7": 3124.252, "encoder_q-layer.8": 3311.7566, "encoder_q-layer.9": 2438.3433, "epoch": 0.93, "inbatch_neg_score": 0.3962, "inbatch_pos_score": 1.0547, "learning_rate": 2.888888888888889e-06, "loss": 3.3299, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.6479, "num_token_overlap": 14.5684, "num_token_query": 37.4604, "num_token_union": 65.3879, "num_word_context": 202.2698, "num_word_doc": 49.7322, "num_word_query": 28.0363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5031.7515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3965, "query_norm": 1.3731, "queue_k_norm": 1.4946, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4604, "sent_len_1": 66.6479, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.635, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.3615, "doc_norm": 1.4948, "encoder_q-embeddings": 2830.6821, "encoder_q-layer.0": 2005.4272, "encoder_q-layer.1": 2142.7634, "encoder_q-layer.10": 2758.1458, "encoder_q-layer.11": 6617.6099, "encoder_q-layer.2": 2550.2791, "encoder_q-layer.3": 2756.3621, "encoder_q-layer.4": 3192.2332, "encoder_q-layer.5": 2739.5933, "encoder_q-layer.6": 2543.6621, "encoder_q-layer.7": 2468.7812, "encoder_q-layer.8": 2866.7925, "encoder_q-layer.9": 2433.5608, "epoch": 0.93, "inbatch_neg_score": 0.3966, "inbatch_pos_score": 1.0342, "learning_rate": 2.8333333333333335e-06, "loss": 3.3615, "norm_diff": 0.1283, "norm_loss": 0.0, "num_token_doc": 66.6161, "num_token_overlap": 14.5312, "num_token_query": 37.2614, "num_token_union": 65.2962, "num_word_context": 202.3672, "num_word_doc": 49.6988, "num_word_query": 27.8929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4858.4735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3965, "query_norm": 1.3665, "queue_k_norm": 1.4945, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2614, "sent_len_1": 66.6161, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0838, "stdk": 0.0489, "stdq": 0.0439, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3425, "doc_norm": 1.4958, "encoder_q-embeddings": 1916.25, "encoder_q-layer.0": 1274.4065, "encoder_q-layer.1": 1330.8922, "encoder_q-layer.10": 2378.5854, "encoder_q-layer.11": 6606.1914, "encoder_q-layer.2": 1473.6208, "encoder_q-layer.3": 1490.4093, "encoder_q-layer.4": 1581.7889, "encoder_q-layer.5": 1611.697, "encoder_q-layer.6": 1895.6091, "encoder_q-layer.7": 2063.8989, "encoder_q-layer.8": 2452.8391, "encoder_q-layer.9": 2243.6509, "epoch": 0.93, "inbatch_neg_score": 0.3996, "inbatch_pos_score": 1.0498, "learning_rate": 2.777777777777778e-06, "loss": 3.3425, "norm_diff": 0.1268, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 14.5662, "num_token_query": 37.3901, "num_token_union": 65.5145, "num_word_context": 202.5911, "num_word_doc": 49.8696, "num_word_query": 27.9974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4134.8617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3984, "query_norm": 1.369, "queue_k_norm": 1.4953, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3901, "sent_len_1": 66.8786, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.55, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3535, "doc_norm": 1.4934, "encoder_q-embeddings": 2138.8831, "encoder_q-layer.0": 1421.2839, "encoder_q-layer.1": 1510.2175, "encoder_q-layer.10": 2546.4883, "encoder_q-layer.11": 6593.729, "encoder_q-layer.2": 1713.0352, "encoder_q-layer.3": 1826.0858, "encoder_q-layer.4": 2025.7455, "encoder_q-layer.5": 2094.0371, "encoder_q-layer.6": 2263.7859, "encoder_q-layer.7": 2287.0376, "encoder_q-layer.8": 2599.8459, "encoder_q-layer.9": 2241.6255, "epoch": 0.93, "inbatch_neg_score": 0.3974, "inbatch_pos_score": 1.0283, "learning_rate": 2.7222222222222224e-06, "loss": 3.3535, "norm_diff": 0.125, "norm_loss": 0.0, "num_token_doc": 66.8486, "num_token_overlap": 14.4699, "num_token_query": 37.0954, "num_token_union": 65.3297, "num_word_context": 202.8286, "num_word_doc": 49.8734, "num_word_query": 27.7439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4257.7934, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.397, "query_norm": 1.3684, "queue_k_norm": 1.4944, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.0954, "sent_len_1": 66.8486, "sent_len_max_0": 128.0, "sent_len_max_1": 192.025, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3443, "doc_norm": 1.4923, "encoder_q-embeddings": 2179.1804, "encoder_q-layer.0": 1467.5238, "encoder_q-layer.1": 1497.9993, "encoder_q-layer.10": 2447.5071, "encoder_q-layer.11": 6642.2373, "encoder_q-layer.2": 1744.8218, "encoder_q-layer.3": 1794.9163, "encoder_q-layer.4": 1870.0742, "encoder_q-layer.5": 1895.3766, "encoder_q-layer.6": 2154.0781, "encoder_q-layer.7": 2351.4929, "encoder_q-layer.8": 2801.6094, "encoder_q-layer.9": 2425.7312, "epoch": 0.93, "inbatch_neg_score": 0.3978, "inbatch_pos_score": 1.0537, "learning_rate": 2.666666666666667e-06, "loss": 3.3443, "norm_diff": 0.1257, "norm_loss": 0.0, "num_token_doc": 66.4927, "num_token_overlap": 14.5223, "num_token_query": 37.2518, "num_token_union": 65.2374, "num_word_context": 202.2082, "num_word_doc": 49.6483, "num_word_query": 27.8894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4358.9355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3972, "query_norm": 1.3666, "queue_k_norm": 1.4927, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2518, "sent_len_1": 66.4927, "sent_len_max_0": 127.995, "sent_len_max_1": 188.4412, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.337, "doc_norm": 1.4975, "encoder_q-embeddings": 13345.9609, "encoder_q-layer.0": 9153.0752, "encoder_q-layer.1": 10408.4805, "encoder_q-layer.10": 2260.4812, "encoder_q-layer.11": 6007.9761, "encoder_q-layer.2": 12452.0654, "encoder_q-layer.3": 13456.8848, "encoder_q-layer.4": 16058.0254, "encoder_q-layer.5": 15190.0557, "encoder_q-layer.6": 14120.9395, "encoder_q-layer.7": 15731.7393, "encoder_q-layer.8": 9535.7266, "encoder_q-layer.9": 2881.1626, "epoch": 0.93, "inbatch_neg_score": 0.3967, "inbatch_pos_score": 1.0566, "learning_rate": 2.6111111111111113e-06, "loss": 3.337, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 66.7514, "num_token_overlap": 14.5565, "num_token_query": 37.1309, "num_token_union": 65.2885, "num_word_context": 202.3179, "num_word_doc": 49.8481, "num_word_query": 27.8181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17543.8165, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3967, "query_norm": 1.3763, "queue_k_norm": 1.4958, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1309, "sent_len_1": 66.7514, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.6213, "stdk": 0.049, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.355, "doc_norm": 1.4974, "encoder_q-embeddings": 8141.2539, "encoder_q-layer.0": 5843.5708, "encoder_q-layer.1": 7077.5698, "encoder_q-layer.10": 2465.7385, "encoder_q-layer.11": 6582.3481, "encoder_q-layer.2": 8210.2725, "encoder_q-layer.3": 8101.4482, "encoder_q-layer.4": 8653.0, "encoder_q-layer.5": 8385.71, "encoder_q-layer.6": 9161.3701, "encoder_q-layer.7": 6099.1665, "encoder_q-layer.8": 4939.6401, "encoder_q-layer.9": 2602.7932, "epoch": 0.93, "inbatch_neg_score": 0.3988, "inbatch_pos_score": 1.0361, "learning_rate": 2.5555555555555557e-06, "loss": 3.355, "norm_diff": 0.1259, "norm_loss": 0.0, "num_token_doc": 66.7324, "num_token_overlap": 14.5832, "num_token_query": 37.2147, "num_token_union": 65.3157, "num_word_context": 202.3577, "num_word_doc": 49.7922, "num_word_query": 27.8371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10693.0188, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3982, "query_norm": 1.3715, "queue_k_norm": 1.4952, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2147, "sent_len_1": 66.7324, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2337, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3367, "doc_norm": 1.4935, "encoder_q-embeddings": 1319.8873, "encoder_q-layer.0": 923.4076, "encoder_q-layer.1": 981.6104, "encoder_q-layer.10": 1192.5516, "encoder_q-layer.11": 3160.6819, "encoder_q-layer.2": 1131.554, "encoder_q-layer.3": 1132.5901, "encoder_q-layer.4": 1206.9576, "encoder_q-layer.5": 1061.1517, "encoder_q-layer.6": 1101.6356, "encoder_q-layer.7": 1209.3783, "encoder_q-layer.8": 1261.1674, "encoder_q-layer.9": 1093.8436, "epoch": 0.93, "inbatch_neg_score": 0.3964, "inbatch_pos_score": 1.0391, "learning_rate": 2.5e-06, "loss": 3.3367, "norm_diff": 0.1262, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 14.632, "num_token_query": 37.3172, "num_token_union": 65.3974, "num_word_context": 202.2562, "num_word_doc": 49.904, "num_word_query": 27.9337, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2206.3118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.397, "query_norm": 1.3673, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3172, "sent_len_1": 66.8786, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8325, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.3392, "doc_norm": 1.4958, "encoder_q-embeddings": 2913.6877, "encoder_q-layer.0": 2122.9651, "encoder_q-layer.1": 2187.0615, "encoder_q-layer.10": 1189.2731, "encoder_q-layer.11": 3290.9014, "encoder_q-layer.2": 2559.9895, "encoder_q-layer.3": 2802.0586, "encoder_q-layer.4": 3102.3845, "encoder_q-layer.5": 2766.2837, "encoder_q-layer.6": 2515.5425, "encoder_q-layer.7": 2481.5098, "encoder_q-layer.8": 1983.9749, "encoder_q-layer.9": 1254.676, "epoch": 0.93, "inbatch_neg_score": 0.3991, "inbatch_pos_score": 1.0732, "learning_rate": 2.4444444444444447e-06, "loss": 3.3392, "norm_diff": 0.1235, "norm_loss": 0.0, "num_token_doc": 66.8937, "num_token_overlap": 14.652, "num_token_query": 37.3589, "num_token_union": 65.3969, "num_word_context": 202.4015, "num_word_doc": 49.9572, "num_word_query": 27.9795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3874.4707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3989, "query_norm": 1.3722, "queue_k_norm": 1.4926, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3589, "sent_len_1": 66.8937, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7925, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3318, "doc_norm": 1.4889, "encoder_q-embeddings": 4458.4893, "encoder_q-layer.0": 3002.4487, "encoder_q-layer.1": 3401.7656, "encoder_q-layer.10": 1163.8705, "encoder_q-layer.11": 3242.0486, "encoder_q-layer.2": 3938.8264, "encoder_q-layer.3": 4292.0073, "encoder_q-layer.4": 4687.041, "encoder_q-layer.5": 4539.6182, "encoder_q-layer.6": 3793.6416, "encoder_q-layer.7": 2558.8098, "encoder_q-layer.8": 2030.2799, "encoder_q-layer.9": 1243.3646, "epoch": 0.93, "inbatch_neg_score": 0.3984, "inbatch_pos_score": 1.0654, "learning_rate": 2.388888888888889e-06, "loss": 3.3318, "norm_diff": 0.1144, "norm_loss": 0.0, "num_token_doc": 67.1194, "num_token_overlap": 14.6674, "num_token_query": 37.4117, "num_token_union": 65.6216, "num_word_context": 202.6225, "num_word_doc": 50.0826, "num_word_query": 28.0241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5287.6909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3982, "query_norm": 1.3745, "queue_k_norm": 1.495, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4117, "sent_len_1": 67.1194, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5925, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3187, "doc_norm": 1.4887, "encoder_q-embeddings": 1343.811, "encoder_q-layer.0": 928.8859, "encoder_q-layer.1": 991.2783, "encoder_q-layer.10": 1286.3071, "encoder_q-layer.11": 3274.3652, "encoder_q-layer.2": 1153.7325, "encoder_q-layer.3": 1170.5962, "encoder_q-layer.4": 1286.8562, "encoder_q-layer.5": 1240.7126, "encoder_q-layer.6": 1418.4905, "encoder_q-layer.7": 1531.4417, "encoder_q-layer.8": 1699.933, "encoder_q-layer.9": 1185.3844, "epoch": 0.94, "inbatch_neg_score": 0.3985, "inbatch_pos_score": 1.04, "learning_rate": 2.3333333333333336e-06, "loss": 3.3187, "norm_diff": 0.1256, "norm_loss": 0.0, "num_token_doc": 66.9962, "num_token_overlap": 14.692, "num_token_query": 37.5742, "num_token_union": 65.5341, "num_word_context": 202.6777, "num_word_doc": 49.9842, "num_word_query": 28.1075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2364.7986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3987, "query_norm": 1.3631, "queue_k_norm": 1.4939, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5742, "sent_len_1": 66.9962, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.6387, "stdk": 0.0486, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3255, "doc_norm": 1.4937, "encoder_q-embeddings": 2924.3945, "encoder_q-layer.0": 1957.6122, "encoder_q-layer.1": 2198.7483, "encoder_q-layer.10": 1244.6207, "encoder_q-layer.11": 3301.9475, "encoder_q-layer.2": 2684.2861, "encoder_q-layer.3": 2873.4558, "encoder_q-layer.4": 2904.5422, "encoder_q-layer.5": 2991.135, "encoder_q-layer.6": 2854.9978, "encoder_q-layer.7": 2823.5862, "encoder_q-layer.8": 2263.9832, "encoder_q-layer.9": 1259.5911, "epoch": 0.94, "inbatch_neg_score": 0.3991, "inbatch_pos_score": 1.0498, "learning_rate": 2.277777777777778e-06, "loss": 3.3255, "norm_diff": 0.1246, "norm_loss": 0.0, "num_token_doc": 66.6926, "num_token_overlap": 14.6445, "num_token_query": 37.3719, "num_token_union": 65.2956, "num_word_context": 202.0095, "num_word_doc": 49.7601, "num_word_query": 27.9686, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3931.0482, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3992, "query_norm": 1.3691, "queue_k_norm": 1.4954, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3719, "sent_len_1": 66.6926, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7075, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3287, "doc_norm": 1.4998, "encoder_q-embeddings": 1147.2795, "encoder_q-layer.0": 825.2346, "encoder_q-layer.1": 895.4204, "encoder_q-layer.10": 1142.7825, "encoder_q-layer.11": 3096.0212, "encoder_q-layer.2": 1076.1652, "encoder_q-layer.3": 1080.627, "encoder_q-layer.4": 1150.3694, "encoder_q-layer.5": 1301.5364, "encoder_q-layer.6": 1394.7115, "encoder_q-layer.7": 1536.4021, "encoder_q-layer.8": 1490.9484, "encoder_q-layer.9": 1155.4954, "epoch": 0.94, "inbatch_neg_score": 0.3985, "inbatch_pos_score": 1.0371, "learning_rate": 2.2222222222222225e-06, "loss": 3.3287, "norm_diff": 0.13, "norm_loss": 0.0, "num_token_doc": 66.6231, "num_token_overlap": 14.65, "num_token_query": 37.4934, "num_token_union": 65.4018, "num_word_context": 202.2939, "num_word_doc": 49.7119, "num_word_query": 28.0821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2234.3567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3992, "query_norm": 1.3698, "queue_k_norm": 1.4947, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4934, "sent_len_1": 66.6231, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.43, "stdk": 0.0491, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3609, "doc_norm": 1.4917, "encoder_q-embeddings": 593.0819, "encoder_q-layer.0": 417.8026, "encoder_q-layer.1": 464.1107, "encoder_q-layer.10": 683.7919, "encoder_q-layer.11": 1669.2516, "encoder_q-layer.2": 570.0145, "encoder_q-layer.3": 606.0269, "encoder_q-layer.4": 655.1474, "encoder_q-layer.5": 605.2081, "encoder_q-layer.6": 685.904, "encoder_q-layer.7": 745.1898, "encoder_q-layer.8": 697.9968, "encoder_q-layer.9": 556.0808, "epoch": 0.94, "inbatch_neg_score": 0.4012, "inbatch_pos_score": 1.0596, "learning_rate": 2.166666666666667e-06, "loss": 3.3609, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.7547, "num_token_overlap": 14.639, "num_token_query": 37.435, "num_token_union": 65.4086, "num_word_context": 202.6943, "num_word_doc": 49.8611, "num_word_query": 28.0509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1173.891, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4009, "query_norm": 1.374, "queue_k_norm": 1.4936, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.435, "sent_len_1": 66.7547, "sent_len_max_0": 127.995, "sent_len_max_1": 191.4688, "stdk": 0.0487, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3384, "doc_norm": 1.488, "encoder_q-embeddings": 1043.7604, "encoder_q-layer.0": 766.3457, "encoder_q-layer.1": 866.1985, "encoder_q-layer.10": 559.4533, "encoder_q-layer.11": 1581.972, "encoder_q-layer.2": 1050.1339, "encoder_q-layer.3": 1237.2085, "encoder_q-layer.4": 1364.6263, "encoder_q-layer.5": 1285.2426, "encoder_q-layer.6": 1287.9955, "encoder_q-layer.7": 1203.2935, "encoder_q-layer.8": 909.3314, "encoder_q-layer.9": 563.8073, "epoch": 0.94, "inbatch_neg_score": 0.4017, "inbatch_pos_score": 1.0469, "learning_rate": 2.1111111111111114e-06, "loss": 3.3384, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.7457, "num_token_overlap": 14.6621, "num_token_query": 37.4606, "num_token_union": 65.4167, "num_word_context": 202.1405, "num_word_doc": 49.8083, "num_word_query": 28.0457, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1655.0024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4014, "query_norm": 1.3749, "queue_k_norm": 1.4933, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4606, "sent_len_1": 66.7457, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.1012, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3425, "doc_norm": 1.4941, "encoder_q-embeddings": 572.4509, "encoder_q-layer.0": 403.0139, "encoder_q-layer.1": 442.6176, "encoder_q-layer.10": 635.4057, "encoder_q-layer.11": 1615.2529, "encoder_q-layer.2": 525.5254, "encoder_q-layer.3": 563.0804, "encoder_q-layer.4": 568.3621, "encoder_q-layer.5": 610.5298, "encoder_q-layer.6": 672.6393, "encoder_q-layer.7": 713.192, "encoder_q-layer.8": 774.077, "encoder_q-layer.9": 584.5566, "epoch": 0.94, "inbatch_neg_score": 0.4016, "inbatch_pos_score": 1.0596, "learning_rate": 2.055555555555556e-06, "loss": 3.3425, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 67.0125, "num_token_overlap": 14.6208, "num_token_query": 37.3206, "num_token_union": 65.5231, "num_word_context": 202.5757, "num_word_doc": 50.0175, "num_word_query": 27.9272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1124.9793, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4004, "query_norm": 1.3732, "queue_k_norm": 1.4951, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3206, "sent_len_1": 67.0125, "sent_len_max_0": 127.995, "sent_len_max_1": 191.1687, "stdk": 0.0488, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.3552, "doc_norm": 1.4968, "encoder_q-embeddings": 801.7446, "encoder_q-layer.0": 577.6819, "encoder_q-layer.1": 619.4145, "encoder_q-layer.10": 582.6213, "encoder_q-layer.11": 1612.5193, "encoder_q-layer.2": 720.3967, "encoder_q-layer.3": 723.2678, "encoder_q-layer.4": 806.9692, "encoder_q-layer.5": 815.0582, "encoder_q-layer.6": 839.2823, "encoder_q-layer.7": 883.7908, "encoder_q-layer.8": 833.1467, "encoder_q-layer.9": 575.157, "epoch": 0.94, "inbatch_neg_score": 0.4014, "inbatch_pos_score": 1.0527, "learning_rate": 2.0000000000000003e-06, "loss": 3.3552, "norm_diff": 0.1373, "norm_loss": 0.0, "num_token_doc": 66.7268, "num_token_overlap": 14.5878, "num_token_query": 37.3132, "num_token_union": 65.3509, "num_word_context": 202.3093, "num_word_doc": 49.7882, "num_word_query": 27.9458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1303.1154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4004, "query_norm": 1.3596, "queue_k_norm": 1.494, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3132, "sent_len_1": 66.7268, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5188, "stdk": 0.0489, "stdq": 0.0436, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.3374, "doc_norm": 1.4958, "encoder_q-embeddings": 621.836, "encoder_q-layer.0": 424.4529, "encoder_q-layer.1": 443.8582, "encoder_q-layer.10": 583.1312, "encoder_q-layer.11": 1639.1191, "encoder_q-layer.2": 521.3176, "encoder_q-layer.3": 551.8144, "encoder_q-layer.4": 584.0352, "encoder_q-layer.5": 606.6898, "encoder_q-layer.6": 654.1898, "encoder_q-layer.7": 711.4464, "encoder_q-layer.8": 702.8655, "encoder_q-layer.9": 584.6453, "epoch": 0.94, "inbatch_neg_score": 0.4013, "inbatch_pos_score": 1.0254, "learning_rate": 1.9444444444444444e-06, "loss": 3.3374, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.9869, "num_token_overlap": 14.5984, "num_token_query": 37.1329, "num_token_union": 65.3732, "num_word_context": 202.3131, "num_word_doc": 49.9789, "num_word_query": 27.7822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1142.7663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4016, "query_norm": 1.3652, "queue_k_norm": 1.4936, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.1329, "sent_len_1": 66.9869, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7475, "stdk": 0.0489, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3346, "doc_norm": 1.4939, "encoder_q-embeddings": 791.563, "encoder_q-layer.0": 524.4362, "encoder_q-layer.1": 597.6125, "encoder_q-layer.10": 589.527, "encoder_q-layer.11": 1589.0797, "encoder_q-layer.2": 704.3204, "encoder_q-layer.3": 738.9313, "encoder_q-layer.4": 837.0424, "encoder_q-layer.5": 868.8356, "encoder_q-layer.6": 956.9211, "encoder_q-layer.7": 1324.7479, "encoder_q-layer.8": 1081.196, "encoder_q-layer.9": 605.7189, "epoch": 0.94, "inbatch_neg_score": 0.4006, "inbatch_pos_score": 1.0439, "learning_rate": 1.888888888888889e-06, "loss": 3.3346, "norm_diff": 0.1274, "norm_loss": 0.0, "num_token_doc": 66.753, "num_token_overlap": 14.5928, "num_token_query": 37.4162, "num_token_union": 65.3862, "num_word_context": 202.3997, "num_word_doc": 49.8095, "num_word_query": 27.9983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1391.011, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4014, "query_norm": 1.3665, "queue_k_norm": 1.4955, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.4162, "sent_len_1": 66.753, "sent_len_max_0": 128.0, "sent_len_max_1": 190.98, "stdk": 0.0488, "stdq": 0.0439, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3541, "doc_norm": 1.4901, "encoder_q-embeddings": 925.5863, "encoder_q-layer.0": 679.9038, "encoder_q-layer.1": 766.1054, "encoder_q-layer.10": 564.0168, "encoder_q-layer.11": 1552.7551, "encoder_q-layer.2": 913.7759, "encoder_q-layer.3": 881.5306, "encoder_q-layer.4": 890.3572, "encoder_q-layer.5": 934.9893, "encoder_q-layer.6": 954.3895, "encoder_q-layer.7": 872.7389, "encoder_q-layer.8": 831.8362, "encoder_q-layer.9": 578.2498, "epoch": 0.94, "inbatch_neg_score": 0.4015, "inbatch_pos_score": 1.0498, "learning_rate": 1.8333333333333335e-06, "loss": 3.3541, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.8399, "num_token_overlap": 14.6212, "num_token_query": 37.3519, "num_token_union": 65.3837, "num_word_context": 202.4025, "num_word_doc": 49.9022, "num_word_query": 27.9848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1391.1661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4014, "query_norm": 1.3648, "queue_k_norm": 1.4937, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3519, "sent_len_1": 66.8399, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2237, "stdk": 0.0487, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.3285, "doc_norm": 1.4952, "encoder_q-embeddings": 701.1517, "encoder_q-layer.0": 483.7361, "encoder_q-layer.1": 547.8397, "encoder_q-layer.10": 642.8046, "encoder_q-layer.11": 1693.1903, "encoder_q-layer.2": 632.0031, "encoder_q-layer.3": 616.4609, "encoder_q-layer.4": 653.8942, "encoder_q-layer.5": 626.5407, "encoder_q-layer.6": 648.8832, "encoder_q-layer.7": 731.624, "encoder_q-layer.8": 738.5723, "encoder_q-layer.9": 606.3811, "epoch": 0.95, "inbatch_neg_score": 0.4, "inbatch_pos_score": 1.043, "learning_rate": 1.777777777777778e-06, "loss": 3.3285, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.795, "num_token_overlap": 14.6037, "num_token_query": 37.4212, "num_token_union": 65.4755, "num_word_context": 202.0967, "num_word_doc": 49.7982, "num_word_query": 28.0069, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1216.0539, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4001, "query_norm": 1.3719, "queue_k_norm": 1.4959, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4212, "sent_len_1": 66.795, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.365, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.3453, "doc_norm": 1.5038, "encoder_q-embeddings": 846.2352, "encoder_q-layer.0": 581.8663, "encoder_q-layer.1": 579.622, "encoder_q-layer.10": 581.7916, "encoder_q-layer.11": 1604.4707, "encoder_q-layer.2": 688.4261, "encoder_q-layer.3": 695.4979, "encoder_q-layer.4": 692.2062, "encoder_q-layer.5": 676.129, "encoder_q-layer.6": 741.6346, "encoder_q-layer.7": 776.4849, "encoder_q-layer.8": 782.1567, "encoder_q-layer.9": 582.2936, "epoch": 0.95, "inbatch_neg_score": 0.3987, "inbatch_pos_score": 1.042, "learning_rate": 1.7222222222222222e-06, "loss": 3.3453, "norm_diff": 0.1402, "norm_loss": 0.0, "num_token_doc": 66.6287, "num_token_overlap": 14.5938, "num_token_query": 37.483, "num_token_union": 65.3597, "num_word_context": 202.4702, "num_word_doc": 49.7417, "num_word_query": 28.0857, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1263.6866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3999, "query_norm": 1.3636, "queue_k_norm": 1.4932, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.483, "sent_len_1": 66.6287, "sent_len_max_0": 127.99, "sent_len_max_1": 187.2475, "stdk": 0.0492, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.3451, "doc_norm": 1.5012, "encoder_q-embeddings": 615.4327, "encoder_q-layer.0": 417.748, "encoder_q-layer.1": 446.8425, "encoder_q-layer.10": 641.6625, "encoder_q-layer.11": 1657.4064, "encoder_q-layer.2": 506.4011, "encoder_q-layer.3": 527.5673, "encoder_q-layer.4": 576.1086, "encoder_q-layer.5": 582.6424, "encoder_q-layer.6": 629.7181, "encoder_q-layer.7": 685.6899, "encoder_q-layer.8": 695.9841, "encoder_q-layer.9": 558.8677, "epoch": 0.95, "inbatch_neg_score": 0.4024, "inbatch_pos_score": 1.0488, "learning_rate": 1.6666666666666667e-06, "loss": 3.3451, "norm_diff": 0.1315, "norm_loss": 0.0, "num_token_doc": 66.6912, "num_token_overlap": 14.5829, "num_token_query": 37.2364, "num_token_union": 65.231, "num_word_context": 202.0206, "num_word_doc": 49.7373, "num_word_query": 27.889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1117.8386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.3697, "queue_k_norm": 1.4952, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2364, "sent_len_1": 66.6912, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6838, "stdk": 0.0491, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3392, "doc_norm": 1.4977, "encoder_q-embeddings": 624.5046, "encoder_q-layer.0": 444.7695, "encoder_q-layer.1": 470.0057, "encoder_q-layer.10": 572.3995, "encoder_q-layer.11": 1592.4653, "encoder_q-layer.2": 528.8608, "encoder_q-layer.3": 532.4838, "encoder_q-layer.4": 561.7892, "encoder_q-layer.5": 563.2413, "encoder_q-layer.6": 553.6013, "encoder_q-layer.7": 596.7591, "encoder_q-layer.8": 688.6688, "encoder_q-layer.9": 554.4673, "epoch": 0.95, "inbatch_neg_score": 0.4021, "inbatch_pos_score": 1.0459, "learning_rate": 1.6111111111111111e-06, "loss": 3.3392, "norm_diff": 0.1333, "norm_loss": 0.0, "num_token_doc": 66.7925, "num_token_overlap": 14.5169, "num_token_query": 37.165, "num_token_union": 65.3228, "num_word_context": 202.2752, "num_word_doc": 49.8256, "num_word_query": 27.8057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1112.1058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4014, "query_norm": 1.3644, "queue_k_norm": 1.4965, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.165, "sent_len_1": 66.7925, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6813, "stdk": 0.0489, "stdq": 0.0438, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3355, "doc_norm": 1.4972, "encoder_q-embeddings": 597.1874, "encoder_q-layer.0": 399.4215, "encoder_q-layer.1": 440.3271, "encoder_q-layer.10": 568.8366, "encoder_q-layer.11": 1522.0052, "encoder_q-layer.2": 486.3489, "encoder_q-layer.3": 490.2715, "encoder_q-layer.4": 544.6758, "encoder_q-layer.5": 534.1806, "encoder_q-layer.6": 565.9107, "encoder_q-layer.7": 589.2546, "encoder_q-layer.8": 630.0715, "encoder_q-layer.9": 546.3216, "epoch": 0.95, "inbatch_neg_score": 0.4009, "inbatch_pos_score": 1.0684, "learning_rate": 1.5555555555555556e-06, "loss": 3.3355, "norm_diff": 0.1139, "norm_loss": 0.0, "num_token_doc": 66.8777, "num_token_overlap": 14.5514, "num_token_query": 37.1608, "num_token_union": 65.3499, "num_word_context": 202.5007, "num_word_doc": 49.8867, "num_word_query": 27.808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1057.3423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.3832, "queue_k_norm": 1.4962, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.1608, "sent_len_1": 66.8777, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2775, "stdk": 0.049, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.3393, "doc_norm": 1.4885, "encoder_q-embeddings": 522.1625, "encoder_q-layer.0": 355.3219, "encoder_q-layer.1": 369.5736, "encoder_q-layer.10": 650.2685, "encoder_q-layer.11": 1672.8151, "encoder_q-layer.2": 420.4061, "encoder_q-layer.3": 418.2133, "encoder_q-layer.4": 457.2667, "encoder_q-layer.5": 484.7023, "encoder_q-layer.6": 534.7422, "encoder_q-layer.7": 581.0636, "encoder_q-layer.8": 697.2042, "encoder_q-layer.9": 608.038, "epoch": 0.95, "inbatch_neg_score": 0.4029, "inbatch_pos_score": 1.0352, "learning_rate": 1.5e-06, "loss": 3.3393, "norm_diff": 0.1225, "norm_loss": 0.0, "num_token_doc": 66.9914, "num_token_overlap": 14.6605, "num_token_query": 37.4624, "num_token_union": 65.5741, "num_word_context": 202.5137, "num_word_doc": 50.0271, "num_word_query": 28.0499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1065.9747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4031, "query_norm": 1.366, "queue_k_norm": 1.4957, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4624, "sent_len_1": 66.9914, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6287, "stdk": 0.0486, "stdq": 0.0439, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3468, "doc_norm": 1.4954, "encoder_q-embeddings": 512.7881, "encoder_q-layer.0": 334.6897, "encoder_q-layer.1": 361.6068, "encoder_q-layer.10": 653.2557, "encoder_q-layer.11": 1646.5361, "encoder_q-layer.2": 406.11, "encoder_q-layer.3": 415.1571, "encoder_q-layer.4": 437.871, "encoder_q-layer.5": 438.6797, "encoder_q-layer.6": 488.4822, "encoder_q-layer.7": 551.0865, "encoder_q-layer.8": 660.9808, "encoder_q-layer.9": 614.9803, "epoch": 0.95, "inbatch_neg_score": 0.4031, "inbatch_pos_score": 1.0605, "learning_rate": 1.4444444444444445e-06, "loss": 3.3468, "norm_diff": 0.1171, "norm_loss": 0.0, "num_token_doc": 66.6958, "num_token_overlap": 14.57, "num_token_query": 37.1392, "num_token_union": 65.2667, "num_word_context": 202.3066, "num_word_doc": 49.7407, "num_word_query": 27.7775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.1987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4028, "query_norm": 1.3784, "queue_k_norm": 1.4963, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1392, "sent_len_1": 66.6958, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0362, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.347, "doc_norm": 1.4995, "encoder_q-embeddings": 871.2956, "encoder_q-layer.0": 586.9249, "encoder_q-layer.1": 628.4708, "encoder_q-layer.10": 649.577, "encoder_q-layer.11": 1559.5266, "encoder_q-layer.2": 735.9142, "encoder_q-layer.3": 755.7482, "encoder_q-layer.4": 760.136, "encoder_q-layer.5": 756.3331, "encoder_q-layer.6": 795.0455, "encoder_q-layer.7": 842.0164, "encoder_q-layer.8": 792.6053, "encoder_q-layer.9": 602.7941, "epoch": 0.95, "inbatch_neg_score": 0.4017, "inbatch_pos_score": 1.0547, "learning_rate": 1.388888888888889e-06, "loss": 3.347, "norm_diff": 0.1241, "norm_loss": 0.0, "num_token_doc": 66.7768, "num_token_overlap": 14.5131, "num_token_query": 37.1753, "num_token_union": 65.3631, "num_word_context": 202.4579, "num_word_doc": 49.8196, "num_word_query": 27.8117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1290.4569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4011, "query_norm": 1.3755, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.1753, "sent_len_1": 66.7768, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.92, "stdk": 0.049, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3266, "doc_norm": 1.4933, "encoder_q-embeddings": 767.4614, "encoder_q-layer.0": 539.8104, "encoder_q-layer.1": 601.0404, "encoder_q-layer.10": 618.171, "encoder_q-layer.11": 1625.6475, "encoder_q-layer.2": 761.228, "encoder_q-layer.3": 777.8221, "encoder_q-layer.4": 848.9823, "encoder_q-layer.5": 872.3838, "encoder_q-layer.6": 848.0796, "encoder_q-layer.7": 790.7141, "encoder_q-layer.8": 765.5726, "encoder_q-layer.9": 602.6504, "epoch": 0.95, "inbatch_neg_score": 0.4024, "inbatch_pos_score": 1.0332, "learning_rate": 1.3333333333333334e-06, "loss": 3.3266, "norm_diff": 0.1239, "norm_loss": 0.0, "num_token_doc": 66.6037, "num_token_overlap": 14.6122, "num_token_query": 37.5644, "num_token_union": 65.425, "num_word_context": 202.1225, "num_word_doc": 49.6557, "num_word_query": 28.1453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1298.0366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.3694, "queue_k_norm": 1.4952, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5644, "sent_len_1": 66.6037, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3363, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3347, "doc_norm": 1.4906, "encoder_q-embeddings": 641.1691, "encoder_q-layer.0": 454.4868, "encoder_q-layer.1": 478.7174, "encoder_q-layer.10": 689.6072, "encoder_q-layer.11": 1708.0645, "encoder_q-layer.2": 559.9207, "encoder_q-layer.3": 547.0773, "encoder_q-layer.4": 585.4466, "encoder_q-layer.5": 626.6555, "encoder_q-layer.6": 591.2962, "encoder_q-layer.7": 623.0279, "encoder_q-layer.8": 697.4255, "encoder_q-layer.9": 609.063, "epoch": 0.95, "inbatch_neg_score": 0.4014, "inbatch_pos_score": 1.04, "learning_rate": 1.2777777777777779e-06, "loss": 3.3347, "norm_diff": 0.1217, "norm_loss": 0.0, "num_token_doc": 67.0036, "num_token_overlap": 14.5797, "num_token_query": 37.2524, "num_token_union": 65.4748, "num_word_context": 202.5487, "num_word_doc": 49.9475, "num_word_query": 27.8775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1158.6123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.369, "queue_k_norm": 1.4962, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2524, "sent_len_1": 67.0036, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9875, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3163, "doc_norm": 1.4979, "encoder_q-embeddings": 833.7689, "encoder_q-layer.0": 576.9667, "encoder_q-layer.1": 671.0726, "encoder_q-layer.10": 574.084, "encoder_q-layer.11": 1615.6085, "encoder_q-layer.2": 693.5945, "encoder_q-layer.3": 683.0078, "encoder_q-layer.4": 624.6373, "encoder_q-layer.5": 598.9026, "encoder_q-layer.6": 548.5403, "encoder_q-layer.7": 577.0023, "encoder_q-layer.8": 632.7271, "encoder_q-layer.9": 577.3373, "epoch": 0.95, "inbatch_neg_score": 0.4015, "inbatch_pos_score": 1.0508, "learning_rate": 1.2222222222222223e-06, "loss": 3.3163, "norm_diff": 0.1272, "norm_loss": 0.0, "num_token_doc": 66.7332, "num_token_overlap": 14.65, "num_token_query": 37.5468, "num_token_union": 65.4365, "num_word_context": 202.576, "num_word_doc": 49.8046, "num_word_query": 28.1501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1206.3063, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4016, "query_norm": 1.3706, "queue_k_norm": 1.4952, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.5468, "sent_len_1": 66.7332, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4487, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.3187, "doc_norm": 1.4978, "encoder_q-embeddings": 744.942, "encoder_q-layer.0": 492.7755, "encoder_q-layer.1": 539.5988, "encoder_q-layer.10": 631.5162, "encoder_q-layer.11": 1622.0691, "encoder_q-layer.2": 646.3784, "encoder_q-layer.3": 643.6904, "encoder_q-layer.4": 660.3494, "encoder_q-layer.5": 667.3834, "encoder_q-layer.6": 688.0552, "encoder_q-layer.7": 657.6101, "encoder_q-layer.8": 703.5432, "encoder_q-layer.9": 585.5787, "epoch": 0.96, "inbatch_neg_score": 0.4015, "inbatch_pos_score": 1.0527, "learning_rate": 1.1666666666666668e-06, "loss": 3.3187, "norm_diff": 0.126, "norm_loss": 0.0, "num_token_doc": 66.7094, "num_token_overlap": 14.6188, "num_token_query": 37.6837, "num_token_union": 65.5713, "num_word_context": 202.5333, "num_word_doc": 49.8065, "num_word_query": 28.2653, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1192.7279, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4009, "query_norm": 1.3718, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.6837, "sent_len_1": 66.7094, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.4387, "stdk": 0.0489, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3418, "doc_norm": 1.492, "encoder_q-embeddings": 500.1759, "encoder_q-layer.0": 337.7553, "encoder_q-layer.1": 353.5702, "encoder_q-layer.10": 632.6302, "encoder_q-layer.11": 1649.0657, "encoder_q-layer.2": 402.3937, "encoder_q-layer.3": 420.7309, "encoder_q-layer.4": 445.3535, "encoder_q-layer.5": 442.1859, "encoder_q-layer.6": 488.2398, "encoder_q-layer.7": 516.7366, "encoder_q-layer.8": 642.6884, "encoder_q-layer.9": 549.6379, "epoch": 0.96, "inbatch_neg_score": 0.4006, "inbatch_pos_score": 1.0459, "learning_rate": 1.1111111111111112e-06, "loss": 3.3418, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.8925, "num_token_overlap": 14.6423, "num_token_query": 37.4199, "num_token_union": 65.4707, "num_word_context": 202.433, "num_word_doc": 49.9163, "num_word_query": 28.0453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1047.4453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.37, "queue_k_norm": 1.4959, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.4199, "sent_len_1": 66.8925, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.6675, "stdk": 0.0487, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3479, "doc_norm": 1.4993, "encoder_q-embeddings": 5063.978, "encoder_q-layer.0": 3314.9658, "encoder_q-layer.1": 4169.1797, "encoder_q-layer.10": 1245.6837, "encoder_q-layer.11": 3145.771, "encoder_q-layer.2": 5137.7744, "encoder_q-layer.3": 5292.314, "encoder_q-layer.4": 5705.2505, "encoder_q-layer.5": 6221.293, "encoder_q-layer.6": 6663.6846, "encoder_q-layer.7": 5737.8691, "encoder_q-layer.8": 3357.2288, "encoder_q-layer.9": 1487.8348, "epoch": 0.96, "inbatch_neg_score": 0.403, "inbatch_pos_score": 1.0527, "learning_rate": 1.0555555555555557e-06, "loss": 3.3479, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.6411, "num_token_overlap": 14.5201, "num_token_query": 37.165, "num_token_union": 65.2231, "num_word_context": 201.9823, "num_word_doc": 49.6763, "num_word_query": 27.8141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6958.0247, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4028, "query_norm": 1.3687, "queue_k_norm": 1.4955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.165, "sent_len_1": 66.6411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8562, "stdk": 0.049, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3379, "doc_norm": 1.4936, "encoder_q-embeddings": 1146.2275, "encoder_q-layer.0": 770.7363, "encoder_q-layer.1": 842.2026, "encoder_q-layer.10": 1156.7075, "encoder_q-layer.11": 3236.1899, "encoder_q-layer.2": 907.6622, "encoder_q-layer.3": 968.694, "encoder_q-layer.4": 1008.1465, "encoder_q-layer.5": 1011.4435, "encoder_q-layer.6": 1083.9384, "encoder_q-layer.7": 1177.9313, "encoder_q-layer.8": 1313.0482, "encoder_q-layer.9": 1171.7385, "epoch": 0.96, "inbatch_neg_score": 0.404, "inbatch_pos_score": 1.0508, "learning_rate": 1.0000000000000002e-06, "loss": 3.3379, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.7568, "num_token_overlap": 14.6026, "num_token_query": 37.3643, "num_token_union": 65.3897, "num_word_context": 202.417, "num_word_doc": 49.8184, "num_word_query": 27.9475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2164.8518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4036, "query_norm": 1.3704, "queue_k_norm": 1.4977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3643, "sent_len_1": 66.7568, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.09, "stdk": 0.0487, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3213, "doc_norm": 1.5017, "encoder_q-embeddings": 932.6409, "encoder_q-layer.0": 635.3461, "encoder_q-layer.1": 670.8177, "encoder_q-layer.10": 1232.3744, "encoder_q-layer.11": 3280.1978, "encoder_q-layer.2": 727.7187, "encoder_q-layer.3": 757.6779, "encoder_q-layer.4": 760.3045, "encoder_q-layer.5": 773.4087, "encoder_q-layer.6": 905.0308, "encoder_q-layer.7": 1043.9629, "encoder_q-layer.8": 1243.7832, "encoder_q-layer.9": 1155.1266, "epoch": 0.96, "inbatch_neg_score": 0.404, "inbatch_pos_score": 1.0654, "learning_rate": 9.444444444444445e-07, "loss": 3.3213, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.7559, "num_token_overlap": 14.6107, "num_token_query": 37.4526, "num_token_union": 65.4297, "num_word_context": 202.2555, "num_word_doc": 49.8389, "num_word_query": 28.0139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.9815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4036, "query_norm": 1.3797, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.4526, "sent_len_1": 66.7559, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0488, "stdk": 0.0491, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.345, "doc_norm": 1.4979, "encoder_q-embeddings": 1244.1559, "encoder_q-layer.0": 875.5045, "encoder_q-layer.1": 953.3383, "encoder_q-layer.10": 1231.1318, "encoder_q-layer.11": 3368.1895, "encoder_q-layer.2": 1112.3715, "encoder_q-layer.3": 1121.9899, "encoder_q-layer.4": 1153.5848, "encoder_q-layer.5": 1207.2386, "encoder_q-layer.6": 1332.9912, "encoder_q-layer.7": 1372.1752, "encoder_q-layer.8": 1411.0762, "encoder_q-layer.9": 1227.8628, "epoch": 0.96, "inbatch_neg_score": 0.4033, "inbatch_pos_score": 1.0479, "learning_rate": 8.88888888888889e-07, "loss": 3.345, "norm_diff": 0.1355, "norm_loss": 0.0, "num_token_doc": 66.9339, "num_token_overlap": 14.5583, "num_token_query": 37.2147, "num_token_union": 65.4178, "num_word_context": 202.3546, "num_word_doc": 49.9075, "num_word_query": 27.813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2323.9524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4026, "query_norm": 1.3624, "queue_k_norm": 1.4976, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.2147, "sent_len_1": 66.9339, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.9525, "stdk": 0.0489, "stdq": 0.0438, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3317, "doc_norm": 1.5004, "encoder_q-embeddings": 1290.5964, "encoder_q-layer.0": 856.4247, "encoder_q-layer.1": 929.2294, "encoder_q-layer.10": 1302.0306, "encoder_q-layer.11": 3240.708, "encoder_q-layer.2": 1120.6661, "encoder_q-layer.3": 1119.4341, "encoder_q-layer.4": 1166.9243, "encoder_q-layer.5": 1176.0052, "encoder_q-layer.6": 1228.0216, "encoder_q-layer.7": 1251.2513, "encoder_q-layer.8": 1290.0901, "encoder_q-layer.9": 1131.5343, "epoch": 0.96, "inbatch_neg_score": 0.4026, "inbatch_pos_score": 1.0635, "learning_rate": 8.333333333333333e-07, "loss": 3.3317, "norm_diff": 0.1291, "norm_loss": 0.0, "num_token_doc": 66.9053, "num_token_overlap": 14.6139, "num_token_query": 37.319, "num_token_union": 65.4319, "num_word_context": 202.4429, "num_word_doc": 49.9031, "num_word_query": 27.9472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2271.4231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4031, "query_norm": 1.3713, "queue_k_norm": 1.4968, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.319, "sent_len_1": 66.9053, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8475, "stdk": 0.049, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.3488, "doc_norm": 1.4951, "encoder_q-embeddings": 1199.472, "encoder_q-layer.0": 820.4883, "encoder_q-layer.1": 901.9635, "encoder_q-layer.10": 1194.0944, "encoder_q-layer.11": 3321.7957, "encoder_q-layer.2": 1012.0731, "encoder_q-layer.3": 1060.4424, "encoder_q-layer.4": 1025.0996, "encoder_q-layer.5": 1060.3956, "encoder_q-layer.6": 998.3825, "encoder_q-layer.7": 1051.8065, "encoder_q-layer.8": 1176.05, "encoder_q-layer.9": 1073.8557, "epoch": 0.96, "inbatch_neg_score": 0.402, "inbatch_pos_score": 1.0439, "learning_rate": 7.777777777777778e-07, "loss": 3.3488, "norm_diff": 0.1247, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 14.5542, "num_token_query": 37.1366, "num_token_union": 65.3596, "num_word_context": 202.3171, "num_word_doc": 49.8587, "num_word_query": 27.7904, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2192.9374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4028, "query_norm": 1.3704, "queue_k_norm": 1.4976, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.1366, "sent_len_1": 66.8051, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7512, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3322, "doc_norm": 1.4952, "encoder_q-embeddings": 1591.486, "encoder_q-layer.0": 1126.8719, "encoder_q-layer.1": 1207.9463, "encoder_q-layer.10": 1209.9741, "encoder_q-layer.11": 3317.353, "encoder_q-layer.2": 1354.3689, "encoder_q-layer.3": 1449.5554, "encoder_q-layer.4": 1515.7195, "encoder_q-layer.5": 1530.1989, "encoder_q-layer.6": 1600.3828, "encoder_q-layer.7": 1556.2781, "encoder_q-layer.8": 1525.7887, "encoder_q-layer.9": 1205.7328, "epoch": 0.96, "inbatch_neg_score": 0.4047, "inbatch_pos_score": 1.0361, "learning_rate": 7.222222222222222e-07, "loss": 3.3322, "norm_diff": 0.1227, "norm_loss": 0.0, "num_token_doc": 66.7298, "num_token_overlap": 14.648, "num_token_query": 37.5103, "num_token_union": 65.4148, "num_word_context": 202.3264, "num_word_doc": 49.7968, "num_word_query": 28.1081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2535.3759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4038, "query_norm": 1.3725, "queue_k_norm": 1.4952, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.5103, "sent_len_1": 66.7298, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0412, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3282, "doc_norm": 1.497, "encoder_q-embeddings": 1533.9742, "encoder_q-layer.0": 1171.5331, "encoder_q-layer.1": 1344.5797, "encoder_q-layer.10": 1224.377, "encoder_q-layer.11": 3360.9778, "encoder_q-layer.2": 1513.2252, "encoder_q-layer.3": 1591.5232, "encoder_q-layer.4": 1756.0487, "encoder_q-layer.5": 1603.9756, "encoder_q-layer.6": 1709.6042, "encoder_q-layer.7": 1853.7765, "encoder_q-layer.8": 1620.4344, "encoder_q-layer.9": 1147.7905, "epoch": 0.96, "inbatch_neg_score": 0.4031, "inbatch_pos_score": 1.0283, "learning_rate": 6.666666666666667e-07, "loss": 3.3282, "norm_diff": 0.1285, "norm_loss": 0.0, "num_token_doc": 66.9328, "num_token_overlap": 14.6099, "num_token_query": 37.5062, "num_token_union": 65.5507, "num_word_context": 202.5469, "num_word_doc": 49.9324, "num_word_query": 28.0999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2672.7136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4026, "query_norm": 1.3685, "queue_k_norm": 1.4981, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.5062, "sent_len_1": 66.9328, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0925, "stdk": 0.0489, "stdq": 0.044, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3406, "doc_norm": 1.5006, "encoder_q-embeddings": 1095.2229, "encoder_q-layer.0": 728.6125, "encoder_q-layer.1": 766.6138, "encoder_q-layer.10": 1398.202, "encoder_q-layer.11": 3618.4302, "encoder_q-layer.2": 882.9345, "encoder_q-layer.3": 910.3416, "encoder_q-layer.4": 918.2994, "encoder_q-layer.5": 939.7857, "encoder_q-layer.6": 1008.7042, "encoder_q-layer.7": 1094.4241, "encoder_q-layer.8": 1389.6472, "encoder_q-layer.9": 1319.8818, "epoch": 0.97, "inbatch_neg_score": 0.4014, "inbatch_pos_score": 1.0664, "learning_rate": 6.111111111111112e-07, "loss": 3.3406, "norm_diff": 0.1353, "norm_loss": 0.0, "num_token_doc": 66.9016, "num_token_overlap": 14.5918, "num_token_query": 37.2741, "num_token_union": 65.4683, "num_word_context": 202.5171, "num_word_doc": 49.9846, "num_word_query": 27.9118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2187.9594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4021, "query_norm": 1.3653, "queue_k_norm": 1.4958, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.2741, "sent_len_1": 66.9016, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.4062, "stdk": 0.049, "stdq": 0.0439, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.313, "doc_norm": 1.5013, "encoder_q-embeddings": 1218.1766, "encoder_q-layer.0": 849.2142, "encoder_q-layer.1": 915.9538, "encoder_q-layer.10": 1286.1492, "encoder_q-layer.11": 3261.5857, "encoder_q-layer.2": 1052.6886, "encoder_q-layer.3": 1115.1292, "encoder_q-layer.4": 1182.0081, "encoder_q-layer.5": 1172.4731, "encoder_q-layer.6": 1293.9938, "encoder_q-layer.7": 1229.0103, "encoder_q-layer.8": 1384.3943, "encoder_q-layer.9": 1207.0411, "epoch": 0.97, "inbatch_neg_score": 0.4029, "inbatch_pos_score": 1.0635, "learning_rate": 5.555555555555556e-07, "loss": 3.313, "norm_diff": 0.124, "norm_loss": 0.0, "num_token_doc": 66.8099, "num_token_overlap": 14.6807, "num_token_query": 37.3538, "num_token_union": 65.3123, "num_word_context": 202.22, "num_word_doc": 49.8265, "num_word_query": 27.9599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2240.8654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4023, "query_norm": 1.3773, "queue_k_norm": 1.4966, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.3538, "sent_len_1": 66.8099, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2713, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3381, "doc_norm": 1.4978, "encoder_q-embeddings": 785.1959, "encoder_q-layer.0": 559.543, "encoder_q-layer.1": 672.9274, "encoder_q-layer.10": 610.7128, "encoder_q-layer.11": 1676.0979, "encoder_q-layer.2": 816.4293, "encoder_q-layer.3": 795.3775, "encoder_q-layer.4": 696.374, "encoder_q-layer.5": 550.92, "encoder_q-layer.6": 576.6435, "encoder_q-layer.7": 583.9872, "encoder_q-layer.8": 626.4753, "encoder_q-layer.9": 566.2499, "epoch": 0.97, "inbatch_neg_score": 0.4024, "inbatch_pos_score": 1.0732, "learning_rate": 5.000000000000001e-07, "loss": 3.3381, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.8748, "num_token_overlap": 14.6467, "num_token_query": 37.3726, "num_token_union": 65.408, "num_word_context": 202.141, "num_word_doc": 49.9069, "num_word_query": 27.9947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1258.9891, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4026, "query_norm": 1.3796, "queue_k_norm": 1.4966, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3726, "sent_len_1": 66.8748, "sent_len_max_0": 128.0, "sent_len_max_1": 188.475, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.3378, "doc_norm": 1.4969, "encoder_q-embeddings": 8861.0186, "encoder_q-layer.0": 6724.0874, "encoder_q-layer.1": 7527.833, "encoder_q-layer.10": 598.5524, "encoder_q-layer.11": 1610.9363, "encoder_q-layer.2": 9184.6523, "encoder_q-layer.3": 8168.8384, "encoder_q-layer.4": 7152.6191, "encoder_q-layer.5": 6741.5947, "encoder_q-layer.6": 6598.9302, "encoder_q-layer.7": 6350.4517, "encoder_q-layer.8": 3461.0154, "encoder_q-layer.9": 852.2898, "epoch": 0.97, "inbatch_neg_score": 0.4023, "inbatch_pos_score": 1.0439, "learning_rate": 4.444444444444445e-07, "loss": 3.3378, "norm_diff": 0.1216, "norm_loss": 0.0, "num_token_doc": 66.9552, "num_token_overlap": 14.6977, "num_token_query": 37.6561, "num_token_union": 65.5465, "num_word_context": 202.5211, "num_word_doc": 49.9306, "num_word_query": 28.2016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9896.1631, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4028, "query_norm": 1.3753, "queue_k_norm": 1.4964, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.6561, "sent_len_1": 66.9552, "sent_len_max_0": 128.0, "sent_len_max_1": 190.69, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.34, "doc_norm": 1.4998, "encoder_q-embeddings": 454.6143, "encoder_q-layer.0": 310.2392, "encoder_q-layer.1": 327.1616, "encoder_q-layer.10": 668.7178, "encoder_q-layer.11": 1592.0732, "encoder_q-layer.2": 361.011, "encoder_q-layer.3": 381.7837, "encoder_q-layer.4": 390.8565, "encoder_q-layer.5": 394.09, "encoder_q-layer.6": 452.5547, "encoder_q-layer.7": 565.6622, "encoder_q-layer.8": 633.1414, "encoder_q-layer.9": 571.9823, "epoch": 0.97, "inbatch_neg_score": 0.4036, "inbatch_pos_score": 1.0811, "learning_rate": 3.888888888888889e-07, "loss": 3.34, "norm_diff": 0.1268, "norm_loss": 0.0, "num_token_doc": 66.8752, "num_token_overlap": 14.618, "num_token_query": 37.306, "num_token_union": 65.414, "num_word_context": 202.4955, "num_word_doc": 49.9402, "num_word_query": 27.9456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1007.0526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.3729, "queue_k_norm": 1.496, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.306, "sent_len_1": 66.8752, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7725, "stdk": 0.049, "stdq": 0.0442, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.3236, "doc_norm": 1.4974, "encoder_q-embeddings": 628.6856, "encoder_q-layer.0": 440.6855, "encoder_q-layer.1": 462.3517, "encoder_q-layer.10": 820.9697, "encoder_q-layer.11": 1693.3723, "encoder_q-layer.2": 550.9857, "encoder_q-layer.3": 541.465, "encoder_q-layer.4": 554.9022, "encoder_q-layer.5": 542.9819, "encoder_q-layer.6": 627.044, "encoder_q-layer.7": 623.4212, "encoder_q-layer.8": 666.1971, "encoder_q-layer.9": 620.1962, "epoch": 0.97, "inbatch_neg_score": 0.4017, "inbatch_pos_score": 1.0791, "learning_rate": 3.3333333333333335e-07, "loss": 3.3236, "norm_diff": 0.1176, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 14.5766, "num_token_query": 37.2763, "num_token_union": 65.4319, "num_word_context": 202.1661, "num_word_doc": 49.9077, "num_word_query": 27.8969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1143.8199, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.3798, "queue_k_norm": 1.4959, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.2763, "sent_len_1": 66.883, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.4775, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.3295, "doc_norm": 1.5035, "encoder_q-embeddings": 621.9711, "encoder_q-layer.0": 437.2802, "encoder_q-layer.1": 439.3968, "encoder_q-layer.10": 748.7946, "encoder_q-layer.11": 1716.6226, "encoder_q-layer.2": 498.1688, "encoder_q-layer.3": 506.2567, "encoder_q-layer.4": 561.5174, "encoder_q-layer.5": 538.4111, "encoder_q-layer.6": 545.4726, "encoder_q-layer.7": 631.6844, "encoder_q-layer.8": 703.5829, "encoder_q-layer.9": 605.2529, "epoch": 0.97, "inbatch_neg_score": 0.4021, "inbatch_pos_score": 1.0742, "learning_rate": 2.777777777777778e-07, "loss": 3.3295, "norm_diff": 0.1282, "norm_loss": 0.0, "num_token_doc": 66.584, "num_token_overlap": 14.6015, "num_token_query": 37.3869, "num_token_union": 65.3099, "num_word_context": 202.0036, "num_word_doc": 49.6848, "num_word_query": 27.9791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1136.5639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4026, "query_norm": 1.3753, "queue_k_norm": 1.496, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.3869, "sent_len_1": 66.584, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.1188, "stdk": 0.0492, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3383, "doc_norm": 1.4951, "encoder_q-embeddings": 690.8241, "encoder_q-layer.0": 477.4491, "encoder_q-layer.1": 536.84, "encoder_q-layer.10": 627.7515, "encoder_q-layer.11": 1646.2795, "encoder_q-layer.2": 651.1518, "encoder_q-layer.3": 646.8821, "encoder_q-layer.4": 697.1242, "encoder_q-layer.5": 748.5628, "encoder_q-layer.6": 819.1914, "encoder_q-layer.7": 783.1201, "encoder_q-layer.8": 749.6342, "encoder_q-layer.9": 600.2389, "epoch": 0.97, "inbatch_neg_score": 0.4062, "inbatch_pos_score": 1.04, "learning_rate": 2.2222222222222224e-07, "loss": 3.3383, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.7199, "num_token_overlap": 14.5985, "num_token_query": 37.3843, "num_token_union": 65.4038, "num_word_context": 202.1875, "num_word_doc": 49.7708, "num_word_query": 28.001, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1218.7802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.3698, "queue_k_norm": 1.4957, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.3843, "sent_len_1": 66.7199, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.365, "stdk": 0.0488, "stdq": 0.044, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.3305, "doc_norm": 1.5047, "encoder_q-embeddings": 993.2404, "encoder_q-layer.0": 764.5862, "encoder_q-layer.1": 875.2059, "encoder_q-layer.10": 630.6416, "encoder_q-layer.11": 1643.9819, "encoder_q-layer.2": 1034.4119, "encoder_q-layer.3": 1132.8662, "encoder_q-layer.4": 1220.4163, "encoder_q-layer.5": 1469.6998, "encoder_q-layer.6": 1483.093, "encoder_q-layer.7": 1494.0159, "encoder_q-layer.8": 1040.9624, "encoder_q-layer.9": 611.1596, "epoch": 0.97, "inbatch_neg_score": 0.4061, "inbatch_pos_score": 1.082, "learning_rate": 1.6666666666666668e-07, "loss": 3.3305, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 66.7938, "num_token_overlap": 14.6109, "num_token_query": 37.4035, "num_token_union": 65.3952, "num_word_context": 202.2297, "num_word_doc": 49.8521, "num_word_query": 28.0182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1724.8885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.3796, "queue_k_norm": 1.498, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 37.4035, "sent_len_1": 66.7938, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7625, "stdk": 0.0492, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.3668, "doc_norm": 1.495, "encoder_q-embeddings": 617.151, "encoder_q-layer.0": 432.643, "encoder_q-layer.1": 489.8413, "encoder_q-layer.10": 624.6266, "encoder_q-layer.11": 1648.5497, "encoder_q-layer.2": 572.3122, "encoder_q-layer.3": 572.9678, "encoder_q-layer.4": 575.0427, "encoder_q-layer.5": 585.2935, "encoder_q-layer.6": 594.2769, "encoder_q-layer.7": 577.3383, "encoder_q-layer.8": 595.4269, "encoder_q-layer.9": 587.5322, "epoch": 0.97, "inbatch_neg_score": 0.4034, "inbatch_pos_score": 1.042, "learning_rate": 1.1111111111111112e-07, "loss": 3.3668, "norm_diff": 0.1311, "norm_loss": 0.0, "num_token_doc": 66.6284, "num_token_overlap": 14.4634, "num_token_query": 37.0681, "num_token_union": 65.2305, "num_word_context": 202.2879, "num_word_doc": 49.721, "num_word_query": 27.7386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1123.7762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4036, "query_norm": 1.3639, "queue_k_norm": 1.4957, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 37.0681, "sent_len_1": 66.6284, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.0225, "stdk": 0.0488, "stdq": 0.0438, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3377, "doc_norm": 1.5019, "encoder_q-embeddings": 1238.9415, "encoder_q-layer.0": 905.081, "encoder_q-layer.1": 1126.2141, "encoder_q-layer.10": 586.6034, "encoder_q-layer.11": 1526.5989, "encoder_q-layer.2": 1217.277, "encoder_q-layer.3": 1428.0168, "encoder_q-layer.4": 1316.8176, "encoder_q-layer.5": 1031.7825, "encoder_q-layer.6": 878.8752, "encoder_q-layer.7": 875.0015, "encoder_q-layer.8": 710.6708, "encoder_q-layer.9": 559.759, "epoch": 0.98, "inbatch_neg_score": 0.4023, "inbatch_pos_score": 1.0781, "learning_rate": 5.555555555555556e-08, "loss": 3.3377, "norm_diff": 0.1225, "norm_loss": 0.0, "num_token_doc": 66.803, "num_token_overlap": 14.5223, "num_token_query": 37.11, "num_token_union": 65.3253, "num_word_context": 202.3372, "num_word_doc": 49.862, "num_word_query": 27.7627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1631.8498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4038, "query_norm": 1.3794, "queue_k_norm": 1.4976, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 37.11, "sent_len_1": 66.803, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8587, "stdk": 0.0491, "stdq": 0.0444, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3355, "doc_norm": 1.4996, "encoder_q-embeddings": 623.5296, "encoder_q-layer.0": 410.9715, "encoder_q-layer.1": 449.6954, "encoder_q-layer.10": 614.0139, "encoder_q-layer.11": 1639.0892, "encoder_q-layer.2": 565.4484, "encoder_q-layer.3": 577.2371, "encoder_q-layer.4": 576.3525, "encoder_q-layer.5": 525.1705, "encoder_q-layer.6": 593.5039, "encoder_q-layer.7": 626.1305, "encoder_q-layer.8": 694.4844, "encoder_q-layer.9": 574.6757, "epoch": 0.98, "inbatch_neg_score": 0.4039, "inbatch_pos_score": 1.0508, "learning_rate": 0.0, "loss": 3.3355, "norm_diff": 0.1277, "norm_loss": 0.0, "num_token_doc": 66.6829, "num_token_overlap": 14.5917, "num_token_query": 37.286, "num_token_union": 65.2664, "num_word_context": 202.4196, "num_word_doc": 49.7781, "num_word_query": 27.8969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1121.4987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4031, "query_norm": 1.3719, "queue_k_norm": 1.4958, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 37.286, "sent_len_1": 66.6829, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.1037, "stdk": 0.049, "stdq": 0.0441, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 29.2492, "dev_samples_per_second": 2.188, "dev_steps_per_second": 0.034, "epoch": 0.98, "step": 100000, "test_accuracy": 93.505859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.376753032207489, "test_doc_norm": 1.4767688512802124, "test_inbatch_neg_score": 0.7561978101730347, "test_inbatch_pos_score": 1.7162816524505615, "test_loss": 0.376753032207489, "test_loss_align": 0.9633158445358276, "test_loss_unif": 3.67779541015625, "test_loss_unif_q@queue": 3.677795648574829, "test_norm_diff": 0.008980831131339073, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3969300091266632, "test_query_norm": 1.4777231216430664, "test_queue_k_norm": 1.496384859085083, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042948462069034576, "test_stdq": 0.04292980581521988, "test_stdqueue_k": 0.048965614289045334, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.2492, "dev_samples_per_second": 2.188, "dev_steps_per_second": 0.034, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.38885, "eval_beir-arguana_recall@10": 0.6522, "eval_beir-arguana_recall@100": 0.93741, "eval_beir-arguana_recall@20": 0.78592, "eval_beir-avg_ndcg@10": 0.3856605833333333, "eval_beir-avg_recall@10": 0.45172575000000004, "eval_beir-avg_recall@100": 0.6350983333333333, "eval_beir-avg_recall@20": 0.5135079166666667, "eval_beir-cqadupstack_ndcg@10": 0.27400583333333334, "eval_beir-cqadupstack_recall@10": 0.3683874999999999, "eval_beir-cqadupstack_recall@100": 0.6010533333333333, "eval_beir-cqadupstack_recall@20": 0.43202916666666685, "eval_beir-fiqa_ndcg@10": 0.23933, "eval_beir-fiqa_recall@10": 0.29241, "eval_beir-fiqa_recall@100": 0.56617, "eval_beir-fiqa_recall@20": 0.37557, "eval_beir-nfcorpus_ndcg@10": 0.31333, "eval_beir-nfcorpus_recall@10": 0.15154, "eval_beir-nfcorpus_recall@100": 0.29552, "eval_beir-nfcorpus_recall@20": 0.18654, "eval_beir-nq_ndcg@10": 0.27242, "eval_beir-nq_recall@10": 0.45355, "eval_beir-nq_recall@100": 0.79193, "eval_beir-nq_recall@20": 0.56969, "eval_beir-quora_ndcg@10": 0.77732, "eval_beir-quora_recall@10": 0.88759, "eval_beir-quora_recall@100": 0.97812, "eval_beir-quora_recall@20": 0.9305, "eval_beir-scidocs_ndcg@10": 0.1498, "eval_beir-scidocs_recall@10": 0.15677, "eval_beir-scidocs_recall@100": 0.36587, "eval_beir-scidocs_recall@20": 0.21367, "eval_beir-scifact_ndcg@10": 0.66223, "eval_beir-scifact_recall@10": 0.79411, "eval_beir-scifact_recall@100": 0.90156, "eval_beir-scifact_recall@20": 0.82411, "eval_beir-trec-covid_ndcg@10": 0.58943, "eval_beir-trec-covid_recall@10": 0.622, "eval_beir-trec-covid_recall@100": 0.4602, "eval_beir-trec-covid_recall@20": 0.606, "eval_beir-webis-touche2020_ndcg@10": 0.18989, "eval_beir-webis-touche2020_recall@10": 0.1387, "eval_beir-webis-touche2020_recall@100": 0.45315, "eval_beir-webis-touche2020_recall@20": 0.21105, "eval_senteval-avg_sts": 0.7453523475765869, "eval_senteval-sickr_spearman": 0.7176593438481393, "eval_senteval-stsb_spearman": 0.7730453513050343, "step": 100000, "test_accuracy": 93.505859375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.376753032207489, "test_doc_norm": 1.4767688512802124, "test_inbatch_neg_score": 0.7561978101730347, "test_inbatch_pos_score": 1.7162816524505615, "test_loss": 0.376753032207489, "test_loss_align": 0.9633158445358276, "test_loss_unif": 3.67779541015625, "test_loss_unif_q@queue": 3.677795648574829, "test_norm_diff": 0.008980831131339073, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3969300091266632, "test_query_norm": 1.4777231216430664, "test_queue_k_norm": 1.496384859085083, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042948462069034576, "test_stdq": 0.04292980581521988, "test_stdqueue_k": 0.048965614289045334, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 77467.0129, "train_samples_per_second": 1.291 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }