How to use
Load model and tokenizer
from transformers import AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
model = AutoModelForTokenClassification.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
Extract NER Tag
import torch
def predict(model, tokenizer, sentence):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = tokenizer(sentence.split(),
is_split_into_words = True,
return_offsets_mapping=True,
return_tensors="pt",
padding='max_length',
truncation=True,
max_length=512)
model.to(device)
# move to gpu
ids = inputs["input_ids"].to(device)
mask = inputs["attention_mask"].to(device)
# forward pass
outputs = model(ids, attention_mask=mask)
logits = outputs[0]
active_logits = logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
token_predictions = [model.config.id2label[i] for i in flattened_predictions.cpu().numpy()]
wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
prediction = []
for token_pred, mapping in zip(wp_preds, inputs["offset_mapping"].squeeze().tolist()):
#only predictions on first word pieces are important
if mapping[0] == 0 and mapping[1] != 0:
prediction.append(token_pred[1])
else:
continue
return sentence.split(), prediction
sentence = "BJ Habibie adalah Presiden Indonesia ke-3"
words, labels = predict(model, tokenizer, sentence)
- Downloads last month
- 102
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.