asofter commited on
Commit
21ab967
1 Parent(s): 1577cef

ONNX-converted version of the model

Browse files

We decided to swap the existing model for the [Code Scanner](https://llm-guard.com/input_scanners/code/) in [llm-guard](https://github.com/laiyer-ai/llm-guard) with your model. Our tests show much better accuracy compared to the HuggingFace's one.

To have faster inference, we use ONNX models converted using Optimum from HuggingFace.

Example of the repo with ONNX built-in: https://huggingface.co/laiyer/deberta-v3-base-prompt-injection

onnx/config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "philomath-1209/programming-language-identification",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "Scala",
15
+ "1": "JavaScript",
16
+ "2": "COBOL",
17
+ "3": "ARM Assembly",
18
+ "4": "R",
19
+ "5": "Lua",
20
+ "6": "C++",
21
+ "7": "Visual Basic .NET",
22
+ "8": "Go",
23
+ "9": "Erlang",
24
+ "10": "C#",
25
+ "11": "Rust",
26
+ "12": "Ruby",
27
+ "13": "Swift",
28
+ "14": "Mathematica/Wolfram Language",
29
+ "15": "PHP",
30
+ "16": "Fortran",
31
+ "17": "AppleScript",
32
+ "18": "Pascal",
33
+ "19": "Java",
34
+ "20": "PowerShell",
35
+ "21": "Python",
36
+ "22": "C",
37
+ "23": "Perl",
38
+ "24": "Kotlin",
39
+ "25": "jq"
40
+ },
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 3072,
43
+ "label2id": {
44
+ "ARM Assembly": 3,
45
+ "AppleScript": 17,
46
+ "C": 22,
47
+ "C#": 10,
48
+ "C++": 6,
49
+ "COBOL": 2,
50
+ "Erlang": 9,
51
+ "Fortran": 16,
52
+ "Go": 8,
53
+ "Java": 19,
54
+ "JavaScript": 1,
55
+ "Kotlin": 24,
56
+ "Lua": 5,
57
+ "Mathematica/Wolfram Language": 14,
58
+ "PHP": 15,
59
+ "Pascal": 18,
60
+ "Perl": 23,
61
+ "PowerShell": 20,
62
+ "Python": 21,
63
+ "R": 4,
64
+ "Ruby": 12,
65
+ "Rust": 11,
66
+ "Scala": 0,
67
+ "Swift": 13,
68
+ "Visual Basic .NET": 7,
69
+ "jq": 25
70
+ },
71
+ "layer_norm_eps": 1e-05,
72
+ "max_position_embeddings": 514,
73
+ "model_type": "roberta",
74
+ "num_attention_heads": 12,
75
+ "num_hidden_layers": 6,
76
+ "pad_token_id": 1,
77
+ "position_embedding_type": "absolute",
78
+ "problem_type": "single_label_classification",
79
+ "torch_dtype": "float32",
80
+ "transformers_version": "4.36.2",
81
+ "type_vocab_size": 1,
82
+ "use_cache": true,
83
+ "vocab_size": 52000
84
+ }
onnx/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9bf25fe9933c20db353c90fd5295554d4c02f06f6f052d8ec1ef87c623e0bf
3
+ size 334030262
onnx/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
onnx/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
onnx/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
onnx/vocab.json ADDED
The diff for this file is too large to render. See raw diff