Marko Tasic commited on
Commit
bf6bb58
1 Parent(s): 8c83e49
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917083108aebdf999483d240272412d42a8cd47ebab1059ab8bd96ff10d2b002
3
+ size 66562045
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e0d90b050e8b65afa291dca4f06938140338f35d30aeac77d96838efa33c7d
3
+ size 66551672
out/pretrain/final/evaluate/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e73e086a5ed14149cee99a1aa3e2563ec7ab536c1653ff332999afa3520694
3
+ size 546
out/pretrain/final/evaluate/model_config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: null
2
+ attention_scores_scalar: null
3
+ bias: false
4
+ block_size: 32768
5
+ final_logit_softcapping: null
6
+ gelu_approximate: none
7
+ head_size: 26
8
+ hf_config: {}
9
+ intermediate_size: 1092
10
+ lm_head_bias: false
11
+ mlp_class_name: LLaMAMLP
12
+ n_embd: 312
13
+ n_expert: 0
14
+ n_expert_per_token: 0
15
+ n_head: 12
16
+ n_layer: 10
17
+ n_query_groups: 4
18
+ name: ''
19
+ norm_class_name: RMSNorm
20
+ norm_eps: 1.0e-05
21
+ padded_vocab_size: 32768
22
+ padding_multiple: 512
23
+ parallel_residual: false
24
+ post_attention_norm: false
25
+ post_mlp_norm: false
26
+ rope_base: 500000
27
+ rope_condense_ratio: 1
28
+ rotary_percentage: 1.0
29
+ scale_embeddings: false
30
+ shared_attention_norm: false
31
+ sliding_window_layer_placing: null
32
+ sliding_window_size: null
33
+ vocab_size: 32768
out/pretrain/final/evaluate/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85703bfe7255763468bcc1a1d2ddb7e13c665cf78f12716ce9177915cec173ef
3
+ size 66570501
out/pretrain/final/evaluate/results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdfcb715382c318246f445c75353b1cc336cfe0e32db3a2d429c4ece6a94f22f
3
+ size 139346753
out/pretrain/final/evaluate/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b496a30dc268bcb8adfd551f693e68e9eadd06b81cab385c088a61e7663649c
3
+ size 1368561
out/pretrain/final/evaluate/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6333d68c3280be6081b795cc160fd5872707562021f9889b2e2bd3ae508fa62
3
+ size 23043