yihang7 commited on Jan 15

Commit

bb83397

•

1 Parent(s): 2217700

Model save

Browse files

Files changed (17) hide show

README.md +155 -0
all_results.json +21 -0
config.json +27 -0
eval_results.json +16 -0
generation_config.json +10 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +298 -0
runs/Jan14_05-17-19_amaterasu/events.out.tfevents.1705209484.amaterasu.1284454.0 +3 -0
runs/Jan14_05-17-19_amaterasu/events.out.tfevents.1705320794.amaterasu.1284454.1 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +40 -0
train_results.json +8 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,155 @@

+---
+tags:
+- generated_from_trainer
+model-index:
+- name: llama2-7b-chat-dpo-full-hydrox-safe
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# llama2-7b-chat-dpo-full-hydrox-safe
+This model was trained from scratch on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0013
+- Rewards/chosen: -0.0939
+- Rewards/rejected: -28.6036
+- Rewards/accuracies: 0.9992
+- Rewards/margins: 28.5097
+- Logps/rejected: -700.8997
+- Logps/chosen: -219.0951
+- Logits/rejected: -0.7196
+- Logits/chosen: -0.6433
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-07
+- train_batch_size: 8
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- total_train_batch_size: 64
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6247        | 0.03  | 100  | 0.6155          | 0.0158         | -0.1451          | 0.8577             | 0.1608          | -416.3145      | -217.9982    | -0.2810         | -0.5546       |
+| 0.3738        | 0.07  | 200  | 0.3507          | 0.1699         | -0.8998          | 0.9184             | 1.0697          | -423.8618      | -216.4572    | -0.3464         | -0.5597       |
+| 0.2144        | 0.1   | 300  | 0.2152          | 0.3547         | -1.8023          | 0.9394             | 2.1570          | -432.8871      | -214.6091    | -0.3537         | -0.5335       |
+| 0.1567        | 0.14  | 400  | 0.1458          | 0.4781         | -2.9967          | 0.9554             | 3.4748          | -444.8309      | -213.3745    | -0.3343         | -0.4947       |
+| 0.1121        | 0.17  | 500  | 0.1250          | 0.5327         | -4.1425          | 0.9689             | 4.6752          | -456.2888      | -212.8291    | -0.3020         | -0.4604       |
+| 0.1003        | 0.2   | 600  | 0.0926          | 0.3744         | -5.4423          | 0.9697             | 5.8167          | -469.2868      | -214.4121    | -0.3227         | -0.4643       |
+| 0.0602        | 0.24  | 700  | 0.0769          | 0.3722         | -6.6327          | 0.9739             | 7.0049          | -481.1906      | -214.4338    | -0.3134         | -0.4460       |
+| 0.0584        | 0.27  | 800  | 0.0638          | 0.4037         | -7.6613          | 0.9806             | 8.0650          | -491.4773      | -214.1189    | -0.2857         | -0.4235       |
+| 0.0555        | 0.31  | 900  | 0.0557          | 0.4281         | -8.2327          | 0.9848             | 8.6608          | -497.1909      | -213.8745    | -0.2914         | -0.4270       |
+| 0.0471        | 0.34  | 1000 | 0.0472          | 0.4046         | -9.7769          | 0.9891             | 10.1814         | -512.6325      | -214.1102    | -0.3491         | -0.4740       |
+| 0.0673        | 0.37  | 1100 | 0.0383          | 0.3282         | -11.0251         | 0.9949             | 11.3534         | -525.1152      | -214.8733    | -0.3772         | -0.4955       |
+| 0.031         | 0.41  | 1200 | 0.0325          | 0.1923         | -11.8461         | 0.9958             | 12.0384         | -533.3251      | -216.2326    | -0.4115         | -0.5142       |
+| 0.0242        | 0.44  | 1300 | 0.0275          | 0.2059         | -12.9425         | 0.9966             | 13.1485         | -544.2894      | -216.0965    | -0.4212         | -0.5150       |
+| 0.0143        | 0.48  | 1400 | 0.0215          | 0.0180         | -11.9692         | 0.9958             | 11.9872         | -534.5560      | -217.9758    | -0.5405         | -0.5538       |
+| 0.0157        | 0.51  | 1500 | 0.0181          | -0.3479        | -14.0856         | 0.9958             | 13.7377         | -555.7203      | -221.6349    | -0.5292         | -0.5576       |
+| 0.0155        | 0.54  | 1600 | 0.0286          | -0.2238        | -13.8665         | 0.9958             | 13.6427         | -553.5294      | -220.3942    | -0.4943         | -0.5256       |
+| 0.0148        | 0.58  | 1700 | 0.0251          | -0.2352        | -15.8803         | 0.9975             | 15.6451         | -573.6669      | -220.5081    | -0.4799         | -0.5212       |
+| 0.0094        | 0.61  | 1800 | 0.0163          | -0.1817        | -16.7316         | 0.9975             | 16.5499         | -582.1795      | -219.9725    | -0.4976         | -0.5385       |
+| 0.0112        | 0.65  | 1900 | 0.0159          | -0.3917        | -18.6440         | 0.9949             | 18.2523         | -601.3036      | -222.0726    | -0.5874         | -0.6174       |
+| 0.007         | 0.68  | 2000 | 0.0106          | -0.1240        | -16.5280         | 0.9975             | 16.4040         | -580.1437      | -219.3957    | -0.5555         | -0.5702       |
+| 0.0083        | 0.71  | 2100 | 0.0167          | -0.3388        | -18.5238         | 0.9975             | 18.1849         | -600.1016      | -221.5440    | -0.5802         | -0.5848       |
+| 0.0058        | 0.75  | 2200 | 0.0166          | 0.1875         | -16.4876         | 0.9975             | 16.6751         | -579.7398      | -216.2812    | -0.5300         | -0.5517       |
+| 0.0031        | 0.78  | 2300 | 0.0167          | -0.4853        | -19.1077         | 0.9966             | 18.6224         | -605.9405      | -223.0087    | -0.5945         | -0.5932       |
+| 0.0041        | 0.82  | 2400 | 0.0148          | -0.1266        | -19.3544         | 0.9983             | 19.2278         | -608.4083      | -219.4222    | -0.5528         | -0.5695       |
+| 0.0129        | 0.85  | 2500 | 0.0277          | -0.6526        | -21.0389         | 0.9983             | 20.3863         | -625.2532      | -224.6820    | -0.6317         | -0.6223       |
+| 0.0169        | 0.88  | 2600 | 0.0158          | -0.6507        | -22.0352         | 0.9983             | 21.3845         | -635.2158      | -224.6625    | -0.6147         | -0.6148       |
+| 0.005         | 0.92  | 2700 | 0.0148          | -0.7455        | -22.5637         | 0.9983             | 21.8181         | -640.5008      | -225.6113    | -0.6401         | -0.6379       |
+| 0.0075        | 0.95  | 2800 | 0.0429          | -0.6179        | -21.7587         | 0.9983             | 21.1408         | -632.4512      | -224.3349    | -0.6580         | -0.6338       |
+| 0.0053        | 0.99  | 2900 | 0.0452          | -0.3093        | -21.3611         | 0.9983             | 21.0518         | -628.4748      | -221.2488    | -0.6473         | -0.6362       |
+| 0.0033        | 1.02  | 3000 | 0.0399          | -0.4299        | -20.6185         | 0.9992             | 20.1886         | -621.0488      | -222.4544    | -0.6812         | -0.6500       |
+| 0.1239        | 1.05  | 3100 | 0.0098          | -0.4156        | -21.6528         | 0.9992             | 21.2371         | -631.3915      | -222.3120    | -0.6612         | -0.6328       |
+| 0.0029        | 1.09  | 3200 | 0.0041          | -0.4823        | -24.1370         | 0.9992             | 23.6547         | -656.2342      | -222.9791    | -0.6460         | -0.6310       |
+| 0.0015        | 1.12  | 3300 | 0.0037          | -0.6250        | -25.4442         | 0.9975             | 24.8192         | -669.3063      | -224.4059    | -0.6623         | -0.6482       |
+| 0.002         | 1.16  | 3400 | 0.0039          | -0.1881        | -23.5637         | 0.9983             | 23.3756         | -650.5010      | -220.0367    | -0.6331         | -0.6142       |
+| 0.0027        | 1.19  | 3500 | 0.0039          | -0.3251        | -24.0619         | 0.9992             | 23.7368         | -655.4830      | -221.4067    | -0.6644         | -0.6402       |
+| 0.0015        | 1.22  | 3600 | 0.0031          | -0.4337        | -26.8013         | 0.9983             | 26.3676         | -682.8770      | -222.4931    | -0.6421         | -0.6330       |
+| 0.0067        | 1.26  | 3700 | 0.0030          | -0.1107        | -22.8513         | 0.9992             | 22.7406         | -643.3767      | -219.2624    | -0.6412         | -0.6162       |
+| 0.002         | 1.29  | 3800 | 0.0029          | -0.4330        | -24.7254         | 0.9992             | 24.2925         | -662.1182      | -222.4855    | -0.6750         | -0.6447       |
+| 0.004         | 1.33  | 3900 | 0.0026          | -0.5258        | -25.6407         | 0.9992             | 25.1150         | -671.2714      | -223.4133    | -0.6613         | -0.6319       |
+| 0.0032        | 1.36  | 4000 | 0.0025          | -0.8592        | -27.4389         | 0.9975             | 26.5797         | -689.2528      | -226.7478    | -0.6796         | -0.6569       |
+| 0.0293        | 1.39  | 4100 | 0.0032          | -0.6286        | -26.4388         | 0.9992             | 25.8102         | -679.2518      | -224.4421    | -0.6657         | -0.6341       |
+| 0.002         | 1.43  | 4200 | 0.0026          | -0.6449        | -26.1156         | 0.9992             | 25.4707         | -676.0200      | -224.6045    | -0.6907         | -0.6546       |
+| 0.0007        | 1.46  | 4300 | 0.0026          | -0.4135        | -25.3743         | 0.9992             | 24.9609         | -668.6074      | -222.2907    | -0.6704         | -0.6348       |
+| 0.001         | 1.5   | 4400 | 0.0025          | -0.1706        | -25.4135         | 0.9992             | 25.2428         | -668.9984      | -219.8623    | -0.6670         | -0.6312       |
+| 0.0018        | 1.53  | 4500 | 0.0026          | -0.3368        | -23.9768         | 0.9992             | 23.6400         | -654.6318      | -221.5240    | -0.6866         | -0.6345       |
+| 0.0035        | 1.56  | 4600 | 0.0025          | 0.0146         | -23.9455         | 0.9992             | 23.9602         | -654.3195      | -218.0095    | -0.6725         | -0.6253       |
+| 0.003         | 1.6   | 4700 | 0.0024          | 0.0616         | -23.3292         | 0.9992             | 23.3908         | -648.1558      | -217.5395    | -0.6644         | -0.6168       |
+| 0.0028        | 1.63  | 4800 | 0.0026          | -0.5134        | -26.9070         | 0.9992             | 26.3937         | -683.9343      | -223.2894    | -0.7161         | -0.6634       |
+| 0.0047        | 1.67  | 4900 | 0.0025          | -0.0916        | -24.8206         | 0.9992             | 24.7290         | -663.0701      | -219.0718    | -0.6444         | -0.6038       |
+| 0.0003        | 1.7   | 5000 | 0.0025          | 0.1584         | -23.8425         | 0.9992             | 24.0009         | -653.2887      | -216.5716    | -0.6169         | -0.5785       |
+| 0.0074        | 1.73  | 5100 | 0.0026          | 0.4581         | -22.1966         | 0.9992             | 22.6546         | -636.8298      | -213.5752    | -0.6477         | -0.5976       |
+| 0.002         | 1.77  | 5200 | 0.0023          | 0.1663         | -23.7774         | 0.9983             | 23.9437         | -652.6381      | -216.4931    | -0.6778         | -0.6312       |
+| 0.0005        | 1.8   | 5300 | 0.0021          | 0.0885         | -24.4639         | 0.9983             | 24.5525         | -659.5032      | -217.2705    | -0.6907         | -0.6445       |
+| 0.0009        | 1.84  | 5400 | 0.0020          | 0.3259         | -23.8153         | 0.9983             | 24.1412         | -653.0168      | -214.8967    | -0.6674         | -0.6177       |
+| 0.0004        | 1.87  | 5500 | 0.0027          | 0.0547         | -25.4516         | 0.9992             | 25.5063         | -669.3798      | -217.6091    | -0.7239         | -0.6630       |
+| 0.0078        | 1.9   | 5600 | 0.0027          | -0.2841        | -27.2416         | 0.9992             | 26.9575         | -687.2796      | -220.9968    | -0.7328         | -0.6718       |
+| 0.0053        | 1.94  | 5700 | 0.0031          | 0.3394         | -23.3205         | 0.9992             | 23.6599         | -648.0685      | -214.7619    | -0.7018         | -0.6326       |
+| 0.0028        | 1.97  | 5800 | 0.0022          | 0.3456         | -23.6389         | 0.9992             | 23.9845         | -651.2528      | -214.7000    | -0.6865         | -0.6247       |
+| 0.0003        | 2.01  | 5900 | 0.0022          | 0.0137         | -25.1376         | 0.9992             | 25.1513         | -666.2399      | -218.0188    | -0.7179         | -0.6544       |
+| 0.0003        | 2.04  | 6000 | 0.0022          | -0.0273        | -25.5899         | 0.9992             | 25.5627         | -670.7634      | -218.4287    | -0.7175         | -0.6559       |
+| 0.0005        | 2.07  | 6100 | 0.0021          | -0.0506        | -26.3022         | 0.9992             | 26.2516         | -677.8860      | -218.6621    | -0.7035         | -0.6425       |
+| 0.0033        | 2.11  | 6200 | 0.0020          | -0.1977        | -27.1231         | 0.9992             | 26.9254         | -686.0947      | -220.1329    | -0.6936         | -0.6406       |
+| 0.0048        | 2.14  | 6300 | 0.0018          | 0.1836         | -25.2467         | 0.9992             | 25.4303         | -667.3306      | -216.3201    | -0.6888         | -0.6298       |
+| 0.0007        | 2.18  | 6400 | 0.0018          | -0.0446        | -26.3563         | 0.9992             | 26.3117         | -678.4270      | -218.6022    | -0.7075         | -0.6494       |
+| 0.0003        | 2.21  | 6500 | 0.0018          | -0.1020        | -27.0392         | 0.9992             | 26.9372         | -685.2560      | -219.1755    | -0.7007         | -0.6418       |
+| 0.0013        | 2.24  | 6600 | 0.0017          | -0.0434        | -26.1507         | 0.9992             | 26.1073         | -676.3707      | -218.5897    | -0.7076         | -0.6401       |
+| 0.0002        | 2.28  | 6700 | 0.0018          | 0.1488         | -25.4695         | 0.9992             | 25.6182         | -669.5585      | -216.6682    | -0.6911         | -0.6184       |
+| 0.0003        | 2.31  | 6800 | 0.0018          | -0.0762        | -26.7830         | 0.9992             | 26.7068         | -682.6938      | -218.9181    | -0.7238         | -0.6530       |
+| 0.0095        | 2.35  | 6900 | 0.0018          | -0.2520        | -27.9261         | 0.9992             | 27.6741         | -694.1253      | -220.6760    | -0.7267         | -0.6572       |
+| 0.0012        | 2.38  | 7000 | 0.0017          | -0.1979        | -27.7144         | 0.9992             | 27.5165         | -692.0080      | -220.1350    | -0.7207         | -0.6516       |
+| 0.0004        | 2.41  | 7100 | 0.0017          | -0.2063        | -28.2831         | 0.9992             | 28.0768         | -697.6947      | -220.2186    | -0.7147         | -0.6448       |
+| 0.0002        | 2.45  | 7200 | 0.0017          | -0.2423        | -28.5426         | 0.9992             | 28.3004         | -700.2905      | -220.5785    | -0.7291         | -0.6572       |
+| 0.0049        | 2.48  | 7300 | 0.0017          | -0.0938        | -27.3084         | 0.9992             | 27.2146         | -687.9479      | -219.0937    | -0.7313         | -0.6487       |
+| 0.0024        | 2.52  | 7400 | 0.0016          | -0.0596        | -27.3730         | 0.9992             | 27.3134         | -688.5939      | -218.7520    | -0.7289         | -0.6467       |
+| 0.0013        | 2.55  | 7500 | 0.0016          | 0.0102         | -27.3445         | 0.9992             | 27.3547         | -688.3093      | -218.0539    | -0.7271         | -0.6462       |
+| 0.0014        | 2.58  | 7600 | 0.0016          | -0.1696        | -28.7332         | 0.9992             | 28.5636         | -702.1956      | -219.8516    | -0.7393         | -0.6604       |
+| 0.0002        | 2.62  | 7700 | 0.0015          | -0.1083        | -28.2952         | 0.9992             | 28.1869         | -697.8158      | -219.2384    | -0.7264         | -0.6502       |
+| 0.0001        | 2.65  | 7800 | 0.0015          | -0.0892        | -28.2958         | 0.9992             | 28.2066         | -697.8219      | -219.0480    | -0.7246         | -0.6479       |
+| 0.0025        | 2.69  | 7900 | 0.0015          | -0.1066        | -28.4335         | 0.9992             | 28.3270         | -699.1990      | -219.2214    | -0.7196         | -0.6447       |
+| 0.0002        | 2.72  | 8000 | 0.0015          | -0.1453        | -28.8184         | 0.9992             | 28.6731         | -703.0482      | -219.6090    | -0.7264         | -0.6518       |
+| 0.0002        | 2.75  | 8100 | 0.0015          | -0.1058        | -28.6632         | 0.9992             | 28.5575         | -701.4964      | -219.2135    | -0.7190         | -0.6438       |
+| 0.0003        | 2.79  | 8200 | 0.0015          | -0.1407        | -28.8865         | 0.9992             | 28.7459         | -703.7291      | -219.5624    | -0.7227         | -0.6488       |
+| 0.0002        | 2.82  | 8300 | 0.0014          | -0.1528        | -28.9232         | 0.9992             | 28.7704         | -704.0963      | -219.6839    | -0.7272         | -0.6534       |
+| 0.0001        | 2.86  | 8400 | 0.0013          | -0.1196        | -28.7573         | 0.9992             | 28.6377         | -702.4371      | -219.3522    | -0.7244         | -0.6492       |
+| 0.0003        | 2.89  | 8500 | 0.0013          | -0.1542        | -28.9522         | 0.9992             | 28.7980         | -704.3861      | -219.6977    | -0.7276         | -0.6518       |
+| 0.0016        | 2.92  | 8600 | 0.0013          | -0.0885        | -28.6082         | 0.9992             | 28.5197         | -700.9456      | -219.0408    | -0.7181         | -0.6426       |
+| 0.0014        | 2.96  | 8700 | 0.0013          | -0.0904        | -28.5887         | 0.9992             | 28.4983         | -700.7510      | -219.0594    | -0.7190         | -0.6429       |
+| 0.0005        | 2.99  | 8800 | 0.0013          | -0.0898        | -28.5857         | 0.9992             | 28.4959         | -700.7208      | -219.0538    | -0.7194         | -0.6430       |
+### Framework versions
+- Transformers 4.35.0
+- Pytorch 2.1.1+cu121
+- Datasets 2.14.6
+- Tokenizers 0.14.1

all_results.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "epoch": 3.0,
+    "eval_logits/chosen": -0.6432551741600037,
+    "eval_logits/rejected": -0.7195827960968018,
+    "eval_logps/chosen": -219.0950927734375,
+    "eval_logps/rejected": -700.8997192382812,
+    "eval_loss": 0.0013098448980599642,
+    "eval_rewards/accuracies": 0.9991582632064819,
+    "eval_rewards/chosen": -0.09392862766981125,
+    "eval_rewards/margins": 28.50965118408203,
+    "eval_rewards/rejected": -28.603578567504883,
+    "eval_runtime": 539.0442,
+    "eval_samples": 9500,
+    "eval_samples_per_second": 17.624,
+    "eval_steps_per_second": 0.551,
+    "train_loss": 0.029375145110450352,
+    "train_runtime": 110770.2779,
+    "train_samples": 188284,
+    "train_samples_per_second": 5.099,
+    "train_steps_per_second": 0.08
+}

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "/media/d1/huggingface.co/models/meta-llama/Llama-2-7b-chat-hf",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 4096,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.35.0",
+  "use_cache": false,
+  "vocab_size": 32000
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 3.0,
+    "eval_logits/chosen": -0.6432551741600037,
+    "eval_logits/rejected": -0.7195827960968018,
+    "eval_logps/chosen": -219.0950927734375,
+    "eval_logps/rejected": -700.8997192382812,
+    "eval_loss": 0.0013098448980599642,
+    "eval_rewards/accuracies": 0.9991582632064819,
+    "eval_rewards/chosen": -0.09392862766981125,
+    "eval_rewards/margins": 28.50965118408203,
+    "eval_rewards/rejected": -28.603578567504883,
+    "eval_runtime": 539.0442,
+    "eval_samples": 9500,
+    "eval_samples_per_second": 17.624,
+    "eval_steps_per_second": 0.551
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "bos_token_id": 1,
+  "do_sample": true,
+  "eos_token_id": 2,
+  "max_length": 4096,
+  "pad_token_id": 0,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.35.0"
+}

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:652fc7bd7091184305e4dfd2e8e30a1178bf5ef7190e75fb91a3370e0e4f7603
+size 4938985352

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cd74a276ea84263a4753a44473bbbe5ed196bff538f69cf8016c6f64fe4712b
+size 4947390880

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c03af98920dcd4aadfa44c03ade096c698b9377d41e2bc138e783501e76c8aef
+size 3590488816

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "metadata": {
+    "total_size": 13476831232
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00003-of-00003.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.norm.weight": "model-00003-of-00003.safetensors"
+  }
+}

runs/Jan14_05-17-19_amaterasu/events.out.tfevents.1705209484.amaterasu.1284454.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6fc1dfbfe6dc44b4da406e5f961123b274f4ab59bd640ccd87e3b0964df1b1e
+size 629555

runs/Jan14_05-17-19_amaterasu/events.out.tfevents.1705320794.amaterasu.1284454.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b99fffc0eeb42f5f722641295159ff60f1901cfac06d303d2d85c673dd00f53
+size 828

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "train_loss": 0.029375145110450352,
+    "train_runtime": 110770.2779,
+    "train_samples": 188284,
+    "train_samples_per_second": 5.099,
+    "train_steps_per_second": 0.08
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53e4faf8feeb2652608b9454c8ed733658b81937eb366bd52ed00cdfc4d717d4
+size 5752