Update README.md
Browse files
README.md
CHANGED
@@ -38,3 +38,41 @@ Notes:
|
|
38 |
- The `tokenize_chinese_chars` parameter indicates special handling for Chinese characters.
|
39 |
- The maximum model length is set to 512 tokens.
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
- The `tokenize_chinese_chars` parameter indicates special handling for Chinese characters.
|
39 |
- The maximum model length is set to 512 tokens.
|
40 |
|
41 |
+
## Run the model
|
42 |
+
|
43 |
+
```Python
|
44 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
45 |
+
|
46 |
+
model_id = "0xnu/AGTD-v0.1"
|
47 |
+
|
48 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
49 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
50 |
+
|
51 |
+
# Input text
|
52 |
+
text = "This model trains on a diverse dataset and serves functions in applications requiring a mechanism for distinguishing between human and AI-generated text."
|
53 |
+
|
54 |
+
# Preprocess the text
|
55 |
+
inputs = tokenizer(text, return_tensors='pt')
|
56 |
+
|
57 |
+
# Run the model
|
58 |
+
outputs = model(**inputs)
|
59 |
+
|
60 |
+
# Interpret the output
|
61 |
+
logits = outputs.logits
|
62 |
+
|
63 |
+
# Apply softmax to convert logits to probabilities
|
64 |
+
probabilities = torch.softmax(logits, dim=1)
|
65 |
+
|
66 |
+
# Assuming the first class is 'human' and the second class is 'ai'
|
67 |
+
human_prob, ai_prob = probabilities.detach().numpy()[0]
|
68 |
+
|
69 |
+
# Print probabilities
|
70 |
+
print(f"Human Probability: {human_prob:.4f}")
|
71 |
+
print(f"AI Probability: {ai_prob:.4f}")
|
72 |
+
|
73 |
+
# Determine if the text is human or AI-generated
|
74 |
+
if human_prob > ai_prob:
|
75 |
+
print("The text is likely human-generated.")
|
76 |
+
else:
|
77 |
+
print("The text is likely AI-generated.")
|
78 |
+
```
|