yihang7 commited on Jan 16

Commit

c5a5546

•

1 Parent(s): 90a4dbd

Model save

Browse files

Files changed (17) hide show

README.md +157 -0
all_results.json +21 -0
config.json +25 -0
eval_results.json +16 -0
generation_config.json +6 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +298 -0
runs/Jan15_19-26-58_amaterasu/events.out.tfevents.1705346844.amaterasu.2386471.0 +3 -0
runs/Jan15_19-26-58_amaterasu/events.out.tfevents.1705441876.amaterasu.2386471.1 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +41 -0
train_results.json +8 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,157 @@

+---
+license: apache-2.0
+base_model: mistralai/Mistral-7B-v0.1
+tags:
+- generated_from_trainer
+model-index:
+- name: Mistral-7B-v0.1-dpo-full-hydrox-safe
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Mistral-7B-v0.1-dpo-full-hydrox-safe
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0029
+- Rewards/chosen: 1.4965
+- Rewards/rejected: -33.3018
+- Rewards/accuracies: 0.9992
+- Rewards/margins: 34.7984
+- Logps/rejected: -735.1938
+- Logps/chosen: -247.9914
+- Logits/rejected: -2.7547
+- Logits/chosen: -2.9524
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-07
+- train_batch_size: 8
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- total_train_batch_size: 64
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.2048        | 0.03  | 100  | 0.1816          | 1.0172         | -1.5955          | 0.9529             | 2.6127          | -418.1306      | -252.7848    | -2.6883         | -2.7595       |
+| 0.1279        | 0.07  | 200  | 0.1099          | 1.8935         | -2.9215          | 0.9621             | 4.8150          | -431.3906      | -244.0213    | -2.6659         | -2.7438       |
+| 0.075         | 0.1   | 300  | 0.1084          | 2.6393         | -3.1267          | 0.9790             | 5.7660          | -433.4421      | -236.5630    | -2.6463         | -2.7255       |
+| 0.0656        | 0.14  | 400  | 0.0670          | 2.6754         | -4.3127          | 0.9840             | 6.9881          | -445.3022      | -236.2028    | -2.6321         | -2.7143       |
+| 0.0314        | 0.17  | 500  | 0.0444          | 2.7614         | -5.7538          | 0.9857             | 8.5151          | -459.7131      | -235.3429    | -2.6533         | -2.7261       |
+| 0.0569        | 0.2   | 600  | 0.0820          | 1.8055         | -9.1246          | 0.9781             | 10.9302         | -493.4217      | -244.9012    | -2.6261         | -2.7256       |
+| 0.0154        | 0.24  | 700  | 0.0703          | 1.5280         | -13.2088         | 0.9857             | 14.7368         | -534.2635      | -247.6769    | -2.5799         | -2.6689       |
+| 0.032         | 0.27  | 800  | 0.0583          | 1.4588         | -11.3987         | 0.9891             | 12.8575         | -516.1622      | -248.3688    | -2.5761         | -2.6670       |
+| 0.0376        | 0.31  | 900  | 0.0440          | 0.8776         | -15.7795         | 0.9924             | 16.6572         | -559.9705      | -254.1801    | -2.5745         | -2.6553       |
+| 0.1198        | 0.34  | 1000 | 0.0460          | 0.7615         | -22.5125         | 0.9933             | 23.2740         | -627.3004      | -255.3416    | -2.6308         | -2.7357       |
+| 0.0438        | 0.37  | 1100 | 0.0293          | 1.1199         | -14.6644         | 0.9949             | 15.7842         | -548.8188      | -251.7577    | -2.7728         | -2.8744       |
+| 0.0368        | 0.41  | 1200 | 0.0349          | 1.4988         | -18.6893         | 0.9924             | 20.1881         | -589.0681      | -247.9686    | -2.6827         | -2.7834       |
+| 0.0218        | 0.44  | 1300 | 0.1406          | 1.9168         | -13.4986         | 0.9739             | 15.4154         | -537.1611      | -243.7885    | -2.4356         | -2.5455       |
+| 0.0302        | 0.48  | 1400 | 0.0197          | 0.3550         | -20.3640         | 0.9941             | 20.7190         | -605.8153      | -259.4061    | -2.5041         | -2.5799       |
+| 0.0114        | 0.51  | 1500 | 0.0231          | 1.0578         | -17.2156         | 0.9958             | 18.2735         | -574.3317      | -252.3781    | -2.4380         | -2.5036       |
+| 0.0108        | 0.54  | 1600 | 0.0267          | 0.7739         | -19.6130         | 0.9966             | 20.3868         | -598.3051      | -255.2180    | -2.5048         | -2.6017       |
+| 0.0142        | 0.58  | 1700 | 0.0431          | -0.7071        | -25.1890         | 0.9966             | 24.4819         | -654.0657      | -270.0278    | -2.6207         | -2.7770       |
+| 0.0367        | 0.61  | 1800 | 0.0242          | 0.5459         | -19.8798         | 0.9966             | 20.4258         | -600.9736      | -257.4970    | -2.6528         | -2.8159       |
+| 0.0123        | 0.65  | 1900 | 0.0170          | 0.2873         | -21.3637         | 0.9958             | 21.6510         | -615.8121      | -260.0836    | -2.7699         | -2.9451       |
+| 0.0279        | 0.68  | 2000 | 0.0238          | 1.3430         | -17.5880         | 0.9941             | 18.9309         | -578.0551      | -249.5269    | -2.6516         | -2.8237       |
+| 0.0049        | 0.71  | 2100 | 0.0199          | -0.1106        | -27.2864         | 0.9941             | 27.1758         | -675.0391      | -264.0627    | -2.6269         | -2.8199       |
+| 0.0028        | 0.75  | 2200 | 0.0181          | 0.1829         | -26.3264         | 0.9941             | 26.5094         | -665.4396      | -261.1270    | -2.7200         | -2.9244       |
+| 0.0166        | 0.78  | 2300 | 0.0194          | -0.3610        | -25.2795         | 0.9958             | 24.9185         | -654.9701      | -266.5665    | -2.6358         | -2.7824       |
+| 0.021         | 0.82  | 2400 | 0.0227          | 1.2726         | -24.2662         | 0.9983             | 25.5388         | -644.8376      | -250.2310    | -2.6171         | -2.7585       |
+| 0.014         | 0.85  | 2500 | 0.0168          | 0.1132         | -29.5821         | 0.9983             | 29.6953         | -697.9961      | -261.8245    | -2.6238         | -2.7765       |
+| 0.0035        | 0.88  | 2600 | 0.0267          | 0.7916         | -24.8514         | 0.9983             | 25.6430         | -650.6893      | -255.0403    | -2.5649         | -2.7397       |
+| 0.0208        | 0.92  | 2700 | 0.0090          | 0.5092         | -28.5463         | 0.9983             | 29.0555         | -687.6382      | -257.8649    | -2.5661         | -2.7543       |
+| 0.0253        | 0.95  | 2800 | 0.0103          | 0.8823         | -30.6264         | 0.9966             | 31.5087         | -708.4396      | -254.1338    | -2.6179         | -2.8054       |
+| 0.0028        | 0.99  | 2900 | 0.0112          | 1.2910         | -25.2950         | 0.9983             | 26.5860         | -655.1255      | -250.0463    | -2.6939         | -2.9014       |
+| 0.0019        | 1.02  | 3000 | 0.0149          | -0.5923        | -26.9350         | 0.9983             | 26.3427         | -671.5254      | -268.8800    | -2.6026         | -2.8588       |
+| 0.0013        | 1.05  | 3100 | 0.0120          | 0.2408         | -27.8722         | 0.9983             | 28.1130         | -680.8969      | -260.5484    | -2.7120         | -2.9730       |
+| 0.0014        | 1.09  | 3200 | 0.0098          | 1.7422         | -24.9625         | 0.9983             | 26.7047         | -651.8002      | -245.5344    | -2.7375         | -2.9656       |
+| 0.0007        | 1.12  | 3300 | 0.0158          | 1.3878         | -24.7373         | 0.9975             | 26.1251         | -649.5485      | -249.0786    | -2.7624         | -2.9950       |
+| 0.0032        | 1.16  | 3400 | 0.0130          | 0.9357         | -27.4065         | 0.9992             | 28.3422         | -676.2398      | -253.5992    | -2.7870         | -3.0069       |
+| 0.0008        | 1.19  | 3500 | 0.0107          | 0.7496         | -28.7658         | 0.9992             | 29.5154         | -689.8333      | -255.4606    | -2.7988         | -3.0340       |
+| 0.0004        | 1.22  | 3600 | 0.0091          | 0.6703         | -30.1929         | 0.9992             | 30.8631         | -704.1040      | -256.2537    | -2.7427         | -2.9740       |
+| 0.0039        | 1.26  | 3700 | 0.0098          | 0.5756         | -28.2500         | 0.9983             | 28.8255         | -684.6750      | -257.2008    | -2.7343         | -2.9647       |
+| 0.0017        | 1.29  | 3800 | 0.0068          | -0.0529        | -33.0047         | 0.9975             | 32.9518         | -732.2226      | -263.4854    | -2.7187         | -2.9478       |
+| 0.0043        | 1.33  | 3900 | 0.0061          | 0.2318         | -31.5623         | 0.9983             | 31.7940         | -717.7981      | -260.6389    | -2.7301         | -2.9546       |
+| 0.0044        | 1.36  | 4000 | 0.0061          | -0.4835        | -34.3777         | 0.9975             | 33.8941         | -745.9522      | -267.7920    | -2.7024         | -2.9245       |
+| 0.0008        | 1.39  | 4100 | 0.0048          | 0.4070         | -31.0079         | 0.9975             | 31.4149         | -712.2544      | -258.8862    | -2.6586         | -2.8669       |
+| 0.0013        | 1.43  | 4200 | 0.0067          | 0.2262         | -27.6468         | 0.9983             | 27.8730         | -678.6434      | -260.6947    | -2.6572         | -2.8776       |
+| 0.0139        | 1.46  | 4300 | 0.0089          | 0.1431         | -38.3175         | 0.9966             | 38.4606         | -785.3500      | -261.5256    | -2.6647         | -2.8567       |
+| 0.0005        | 1.5   | 4400 | 0.0052          | 1.1567         | -28.1924         | 0.9983             | 29.3490         | -684.0989      | -251.3895    | -2.6746         | -2.8756       |
+| 0.0017        | 1.53  | 4500 | 0.0048          | 0.9539         | -28.4808         | 0.9983             | 29.4348         | -686.9838      | -253.4173    | -2.6321         | -2.8397       |
+| 0.0076        | 1.56  | 4600 | 0.0053          | 1.2200         | -25.8289         | 0.9975             | 27.0489         | -660.4644      | -250.7563    | -2.6732         | -2.8768       |
+| 0.0073        | 1.6   | 4700 | 0.0037          | 0.5422         | -29.8961         | 0.9975             | 30.4383         | -701.1363      | -257.5345    | -2.6687         | -2.8635       |
+| 0.0064        | 1.63  | 4800 | 0.0058          | 0.1897         | -35.1189         | 0.9975             | 35.3086         | -753.3646      | -261.0594    | -2.8192         | -3.0418       |
+| 0.0027        | 1.67  | 4900 | 0.0035          | 1.0140         | -29.5701         | 0.9983             | 30.5841         | -697.8760      | -252.8162    | -2.7731         | -2.9981       |
+| 0.0004        | 1.7   | 5000 | 0.0025          | 1.5574         | -25.0463         | 0.9983             | 26.6037         | -652.6386      | -247.3826    | -2.7513         | -2.9753       |
+| 0.008         | 1.73  | 5100 | 0.0026          | 1.1263         | -30.3188         | 0.9983             | 31.4451         | -705.3633      | -251.6933    | -2.7995         | -3.0092       |
+| 0.0015        | 1.77  | 5200 | 0.0020          | 1.3119         | -29.0704         | 0.9983             | 30.3824         | -692.8795      | -249.8371    | -2.7971         | -3.0186       |
+| 0.0021        | 1.8   | 5300 | 0.0024          | 0.7331         | -31.3066         | 0.9975             | 32.0396         | -715.2409      | -255.6256    | -2.7968         | -2.9833       |
+| 0.0014        | 1.84  | 5400 | 0.0018          | 1.4142         | -28.7232         | 0.9975             | 30.1374         | -689.4075      | -248.8142    | -2.8127         | -2.9997       |
+| 0.0002        | 1.87  | 5500 | 0.0036          | 0.2662         | -33.0751         | 0.9975             | 33.3414         | -732.9266      | -260.2943    | -2.7900         | -2.9893       |
+| 0.0123        | 1.9   | 5600 | 0.0034          | 0.8622         | -29.0743         | 0.9983             | 29.9365         | -692.9180      | -254.3346    | -2.7901         | -2.9879       |
+| 0.0022        | 1.94  | 5700 | 0.0027          | 1.4619         | -26.1686         | 0.9983             | 27.6305         | -663.8615      | -248.3373    | -2.7543         | -2.9437       |
+| 0.0023        | 1.97  | 5800 | 0.0026          | 1.3065         | -29.0318         | 0.9992             | 30.3383         | -692.4929      | -249.8912    | -2.7777         | -2.9641       |
+| 0.0002        | 2.01  | 5900 | 0.0024          | 1.3466         | -31.8791         | 0.9992             | 33.2256         | -720.9660      | -249.4908    | -2.7919         | -2.9806       |
+| 0.0004        | 2.04  | 6000 | 0.0024          | 1.2456         | -33.3469         | 0.9992             | 34.5924         | -735.6440      | -250.5009    | -2.7884         | -2.9774       |
+| 0.0002        | 2.07  | 6100 | 0.0051          | 0.8107         | -37.2426         | 0.9983             | 38.0533         | -774.6013      | -254.8491    | -2.8039         | -2.9851       |
+| 0.0025        | 2.11  | 6200 | 0.0053          | 1.2104         | -32.6969         | 0.9983             | 33.9073         | -729.1440      | -250.8525    | -2.7910         | -2.9641       |
+| 0.001         | 2.14  | 6300 | 0.0050          | 1.5723         | -30.7677         | 0.9983             | 32.3400         | -709.8519      | -247.2332    | -2.7886         | -2.9728       |
+| 0.0062        | 2.18  | 6400 | 0.0062          | 1.0454         | -32.4375         | 0.9983             | 33.4829         | -726.5503      | -252.5022    | -2.7689         | -2.9530       |
+| 0.0006        | 2.21  | 6500 | 0.0050          | 1.2958         | -32.8746         | 0.9992             | 34.1704         | -730.9218      | -249.9990    | -2.7450         | -2.9272       |
+| 0.0011        | 2.24  | 6600 | 0.0035          | 1.8455         | -29.7702         | 0.9992             | 31.6158         | -699.8776      | -244.5013    | -2.7935         | -2.9672       |
+| 0.0001        | 2.28  | 6700 | 0.0040          | 2.0206         | -29.9992         | 0.9992             | 32.0197         | -702.1669      | -242.7507    | -2.8074         | -2.9801       |
+| 0.0002        | 2.31  | 6800 | 0.0042          | 1.5238         | -33.6284         | 0.9992             | 35.1522         | -738.4594      | -247.7182    | -2.7943         | -2.9749       |
+| 0.0246        | 2.35  | 6900 | 0.0039          | 0.7561         | -35.1964         | 0.9992             | 35.9525         | -754.1393      | -255.3954    | -2.7779         | -2.9606       |
+| 0.0011        | 2.38  | 7000 | 0.0038          | 1.1395         | -31.9450         | 0.9992             | 33.0845         | -721.6255      | -251.5618    | -2.7762         | -2.9534       |
+| 0.0001        | 2.41  | 7100 | 0.0040          | 1.2345         | -34.3482         | 0.9992             | 35.5827         | -745.6570      | -250.6111    | -2.7624         | -2.9515       |
+| 0.0002        | 2.45  | 7200 | 0.0034          | 1.6020         | -32.1494         | 0.9992             | 33.7514         | -723.6697      | -246.9365    | -2.7747         | -2.9635       |
+| 0.0046        | 2.48  | 7300 | 0.0036          | 1.6314         | -32.1292         | 0.9992             | 33.7605         | -723.4673      | -246.6430    | -2.7679         | -2.9556       |
+| 0.0023        | 2.52  | 7400 | 0.0035          | 1.4778         | -33.5251         | 0.9992             | 35.0028         | -737.4260      | -248.1789    | -2.7722         | -2.9629       |
+| 0.0011        | 2.55  | 7500 | 0.0034          | 1.6981         | -32.6639         | 0.9992             | 34.3620         | -728.8140      | -245.9752    | -2.7916         | -2.9806       |
+| 0.0012        | 2.58  | 7600 | 0.0032          | 1.7076         | -32.6830         | 0.9992             | 34.3906         | -729.0056      | -245.8805    | -2.7888         | -2.9758       |
+| 0.0001        | 2.62  | 7700 | 0.0034          | 2.0561         | -29.7915         | 0.9992             | 31.8476         | -700.0899      | -242.3954    | -2.7837         | -2.9656       |
+| 0.0           | 2.65  | 7800 | 0.0033          | 2.0375         | -30.3971         | 0.9992             | 32.4345         | -706.1458      | -242.5820    | -2.7782         | -2.9618       |
+| 0.0027        | 2.69  | 7900 | 0.0031          | 1.8698         | -31.1258         | 0.9992             | 32.9955         | -713.4329      | -244.2589    | -2.7837         | -2.9739       |
+| 0.0001        | 2.72  | 8000 | 0.0029          | 1.8124         | -32.0635         | 0.9992             | 33.8759         | -722.8105      | -244.8322    | -2.7619         | -2.9524       |
+| 0.0           | 2.75  | 8100 | 0.0029          | 1.7514         | -32.6143         | 0.9992             | 34.3656         | -728.3180      | -245.4429    | -2.7594         | -2.9517       |
+| 0.0001        | 2.79  | 8200 | 0.0029          | 1.7056         | -33.0849         | 0.9992             | 34.7904         | -733.0240      | -245.9009    | -2.7606         | -2.9530       |
+| 0.0           | 2.82  | 8300 | 0.0030          | 1.6349         | -32.8211         | 0.9992             | 34.4560         | -730.3865      | -246.6072    | -2.7437         | -2.9371       |
+| 0.0001        | 2.86  | 8400 | 0.0029          | 1.5951         | -32.9498         | 0.9992             | 34.5450         | -731.6738      | -247.0051    | -2.7438         | -2.9386       |
+| 0.0001        | 2.89  | 8500 | 0.0029          | 1.5667         | -32.9358         | 0.9992             | 34.5025         | -731.5333      | -247.2896    | -2.7541         | -2.9497       |
+| 0.0029        | 2.92  | 8600 | 0.0029          | 1.4986         | -33.3107         | 0.9992             | 34.8093         | -735.2822      | -247.9706    | -2.7541         | -2.9514       |
+| 0.0013        | 2.96  | 8700 | 0.0029          | 1.4945         | -33.3218         | 0.9992             | 34.8163         | -735.3931      | -248.0111    | -2.7544         | -2.9518       |
+| 0.0004        | 2.99  | 8800 | 0.0029          | 1.4978         | -33.2941         | 0.9992             | 34.7920         | -735.1168      | -247.9782    | -2.7547         | -2.9525       |
+### Framework versions
+- Transformers 4.35.0
+- Pytorch 2.1.1+cu121
+- Datasets 2.14.6
+- Tokenizers 0.14.1

all_results.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "epoch": 3.0,
+    "eval_logits/chosen": -2.952404499053955,
+    "eval_logits/rejected": -2.754680871963501,
+    "eval_logps/chosen": -247.9913787841797,
+    "eval_logps/rejected": -735.1937866210938,
+    "eval_loss": 0.002894825069233775,
+    "eval_rewards/accuracies": 0.9991582632064819,
+    "eval_rewards/chosen": 1.4965133666992188,
+    "eval_rewards/margins": 34.79835510253906,
+    "eval_rewards/rejected": -33.301841735839844,
+    "eval_runtime": 463.448,
+    "eval_samples": 9500,
+    "eval_samples_per_second": 20.499,
+    "eval_steps_per_second": 0.641,
+    "train_loss": 0.022219736984536855,
+    "train_runtime": 94567.8662,
+    "train_samples": 188284,
+    "train_samples_per_second": 5.973,
+    "train_steps_per_second": 0.093
+}

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.35.0",
+  "use_cache": false,
+  "vocab_size": 32000
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 3.0,
+    "eval_logits/chosen": -2.952404499053955,
+    "eval_logits/rejected": -2.754680871963501,
+    "eval_logps/chosen": -247.9913787841797,
+    "eval_logps/rejected": -735.1937866210938,
+    "eval_loss": 0.002894825069233775,
+    "eval_rewards/accuracies": 0.9991582632064819,
+    "eval_rewards/chosen": 1.4965133666992188,
+    "eval_rewards/margins": 34.79835510253906,
+    "eval_rewards/rejected": -33.301841735839844,
+    "eval_runtime": 463.448,
+    "eval_samples": 9500,
+    "eval_samples_per_second": 20.499,
+    "eval_steps_per_second": 0.641
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.35.0"
+}

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11272dbb21a811c8e89bde03c790699508894ee430109842b23d285e80aa5dab
+size 4943162336

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5d26e80ab409046000f65db751441d48357d7cfd55fbd95f2e6abc0708c7df1
+size 4999819336

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b5ab7f433bb9c33a2e6e1c577894ad504f22226ef6d471b165ffebafa833d13
+size 4540516344

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "metadata": {
+    "total_size": 14483464192
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00003-of-00003.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.norm.weight": "model-00003-of-00003.safetensors"
+  }
+}

runs/Jan15_19-26-58_amaterasu/events.out.tfevents.1705346844.amaterasu.2386471.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb9206745d9f4d8fac6563fa82ad134430ce161bd8fac804162e86d0b47693bd
+size 629479

runs/Jan15_19-26-58_amaterasu/events.out.tfevents.1705441876.amaterasu.2386471.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4acf02daf6e85d9890455f44d6193330e18fb4f5c254d426ac48a88f0dba0d98
+size 828

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "train_loss": 0.022219736984536855,
+    "train_runtime": 94567.8662,
+    "train_samples": 188284,
+    "train_samples_per_second": 5.973,
+    "train_steps_per_second": 0.093
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fae8ec6e649455133e345b363337c54277c0e0d693b715fcb134994f90abfda5
+size 5752