safaricd commited on
Commit
40a35b0
β€’
1 Parent(s): 81b8776

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
22
  "up_proj",
23
- "o_proj",
24
  "k_proj",
25
  "gate_proj",
26
- "q_proj",
27
- "down_proj",
28
- "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "v_proj",
23
+ "down_proj",
24
+ "q_proj",
25
  "up_proj",
 
26
  "k_proj",
27
  "gate_proj",
28
+ "o_proj"
 
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e92f82de5230f0325f712593d34e359445f49e7d9d62ef3fb29ecf1bbc991ad2
3
- size 1803907984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5dfcb47d2492bcd381c74fff215f0c601db4a68fcf916e38070d7d97898d38
3
+ size 1803940752
added_tokens.json CHANGED
@@ -1,4 +1,6 @@
1
  {
2
- "<|im_end|>": 32016,
3
- "<|im_start|>": 32017
 
 
4
  }
 
1
  {
2
+ "DIALECT": 32018,
3
+ "[/DIALECT]": 32017,
4
+ "[PAD]": 32019,
5
+ "[SQL]": 32016
6
  }
special_tokens_map.json CHANGED
@@ -1,28 +1,35 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<|im_end|>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "<|im_start|>",
 
 
 
 
 
 
 
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  }
17
  ],
18
- "bos_token": "<|im_start|>",
19
- "eos_token": "<|im_end|>",
20
- "pad_token": "<|im_end|>",
21
- "unk_token": {
22
- "content": "<unk>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false
27
- }
 
28
  }
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "[SQL]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "[/DIALECT]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "DIALECT",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  }
24
  ],
25
+ "bos_token": "<s>",
26
+ "eos_token": "</s>",
27
+ "pad_token": {
28
+ "content": "[PAD]",
 
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false
33
+ },
34
+ "unk_token": "<unk>"
35
  }
tmpdr2e1dme/_remote_module_non_scriptable.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import *
2
+
3
+ import torch
4
+ import torch.distributed.rpc as rpc
5
+ from torch import Tensor
6
+ from torch._jit_internal import Future
7
+ from torch.distributed.rpc import RRef
8
+ from typing import Tuple # pyre-ignore: unused import
9
+
10
+
11
+ module_interface_cls = None
12
+
13
+
14
+ def forward_async(self, *args, **kwargs):
15
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
16
+ kwargs = {**kwargs}
17
+ return rpc.rpc_async(
18
+ self.module_rref.owner(),
19
+ _remote_forward,
20
+ args,
21
+ kwargs,
22
+ )
23
+
24
+
25
+ def forward(self, *args, **kwargs):
26
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
27
+ kwargs = {**kwargs}
28
+ ret_fut = rpc.rpc_async(
29
+ self.module_rref.owner(),
30
+ _remote_forward,
31
+ args,
32
+ kwargs,
33
+ )
34
+ return ret_fut.wait()
35
+
36
+
37
+ _generated_methods = [
38
+ forward_async,
39
+ forward,
40
+ ]
41
+
42
+
43
+
44
+
45
+ def _remote_forward(
46
+ module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
47
+ module = module_rref.local_value()
48
+ device = torch.device(device)
49
+
50
+ if device.type != "cuda":
51
+ return module.forward(*args, **kwargs)
52
+
53
+ # If the module is on a cuda device,
54
+ # move any CPU tensor in args or kwargs to the same cuda device.
55
+ # Since torch script does not support generator expression,
56
+ # have to use concatenation instead of
57
+ # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
58
+ args = (*args,)
59
+ out_args: Tuple[()] = ()
60
+ for arg in args:
61
+ arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
62
+ out_args = out_args + arg
63
+
64
+ kwargs = {**kwargs}
65
+ for k, v in kwargs.items():
66
+ if isinstance(v, Tensor):
67
+ kwargs[k] = kwargs[k].to(device)
68
+
69
+ if is_device_map_set:
70
+ return module.forward(*out_args, **kwargs)
71
+
72
+ # If the device map is empty, then only CPU tensors are allowed to send over wire,
73
+ # so have to move any GPU tensor to CPU in the output.
74
+ # Since torch script does not support generator expression,
75
+ # have to use concatenation instead of
76
+ # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
77
+ ret: Tuple[()] = ()
78
+ for i in module.forward(*out_args, **kwargs):
79
+ i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
80
+ ret = ret + i
81
+ return ret
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -68,7 +73,7 @@
68
  },
69
  {
70
  "id": 32016,
71
- "content": "<|im_end|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +82,25 @@
77
  },
78
  {
79
  "id": 32017,
80
- "content": "<|im_start|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 3072,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
73
  },
74
  {
75
  "id": 32016,
76
+ "content": "[SQL]",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
 
82
  },
83
  {
84
  "id": 32017,
85
+ "content": "[/DIALECT]",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
+ },
92
+ {
93
+ "id": 32018,
94
+ "content": "DIALECT",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
+ },
101
+ {
102
+ "id": 32019,
103
+ "content": "[PAD]",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -59,7 +59,7 @@
59
  "special": true
60
  },
61
  "32016": {
62
- "content": "<|im_end|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +67,23 @@
67
  "special": true
68
  },
69
  "32017": {
70
- "content": "<|im_start|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -76,19 +92,19 @@
76
  }
77
  },
78
  "additional_special_tokens": [
79
- "<|im_end|>",
80
- "<|im_start|>"
 
81
  ],
82
- "bos_token": "<|im_start|>",
83
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
84
  "clean_up_tokenization_spaces": false,
85
- "eos_token": "<|im_end|>",
86
  "eot_token": "▁<EOT>",
87
  "fill_token": "<FILL_ME>",
88
  "legacy": null,
89
  "middle_token": "▁<MID>",
90
  "model_max_length": 1000000000000000019884624838656,
91
- "pad_token": "<|im_end|>",
92
  "prefix_token": "▁<PRE>",
93
  "sp_model_kwargs": {},
94
  "suffix_token": "▁<SUF>",
 
59
  "special": true
60
  },
61
  "32016": {
62
+ "content": "[SQL]",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
 
67
  "special": true
68
  },
69
  "32017": {
70
+ "content": "[/DIALECT]",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "32018": {
78
+ "content": "DIALECT",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "32019": {
86
+ "content": "[PAD]",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
 
92
  }
93
  },
94
  "additional_special_tokens": [
95
+ "[SQL]",
96
+ "[/DIALECT]",
97
+ "DIALECT"
98
  ],
99
+ "bos_token": "<s>",
 
100
  "clean_up_tokenization_spaces": false,
101
+ "eos_token": "</s>",
102
  "eot_token": "▁<EOT>",
103
  "fill_token": "<FILL_ME>",
104
  "legacy": null,
105
  "middle_token": "▁<MID>",
106
  "model_max_length": 1000000000000000019884624838656,
107
+ "pad_token": "[PAD]",
108
  "prefix_token": "▁<PRE>",
109
  "sp_model_kwargs": {},
110
  "suffix_token": "▁<SUF>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bafe043d2711fd6e68d65399d3d57e5bfff724c77431558500c810926dc46ea
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e79e5a8a5acfb7f938b2a7d99d219ffaf4aa5de423917c0649be9238a74814
3
  size 4664