from transformers import GPT2ForSequenceClassification, GPT2Tokenizer
from datasets import load_dataset
from transformers import pipeline
import pandas as pd
# Load the model from Hugging Face
model = GPT2ForSequenceClassification.from_pretrained('sartajbhuvaji/gutenberg-gpt2')
tokenizer = GPT2Tokenizer.from_pretrained("sartajbhuvaji/gutenberg-gpt2")
# Create a text classification pipeline
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
# Test the pipeline
result = classifier("This is a great book!")
print(result) # [{'label': 'LABEL_7', 'score': 0.8302432298660278}]
# Test the pipeline on a document
doc_id = 1
doc_text = df.loc[df['DocID'] == doc_id, 'Text'].values[0]
result = classifier(doc_text[:1024])
print(result) # [{'label': 'LABEL_4', 'score': 0.6285566091537476}]
- Downloads last month
- 5
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Model tree for sartajbhuvaji/gutenberg-gpt2
Base model
openai-community/gpt2