Get dataset information
The dataset viewer provides an /info
endpoint for exploring the general information about dataset, including such fields as description, citation, homepage, license and features.
The /info
endpoint accepts two query parameters:
dataset
: the dataset nameconfig
: the subset name
Python
JavaScript
cURL
import requests
headers = {"Authorization": f"Bearer {API_TOKEN}"}
API_URL = "https://datasets-server.huggingface.co/info?dataset=ibm/duorc&config=SelfRC"
def query():
response = requests.get(API_URL, headers=headers)
return response.json()
data = query()
The endpoint response is a JSON with the dataset_info
key. Its structure and content correspond to DatasetInfo object of the datasets
library.
{
"dataset_info": {
"description": "",
"citation": "",
"homepage": "",
"license": "",
"features": {
"plot_id": { "dtype": "string", "_type": "Value" },
"plot": { "dtype": "string", "_type": "Value" },
"title": { "dtype": "string", "_type": "Value" },
"question_id": { "dtype": "string", "_type": "Value" },
"question": { "dtype": "string", "_type": "Value" },
"answers": {
"feature": { "dtype": "string", "_type": "Value" },
"_type": "Sequence"
},
"no_answer": { "dtype": "bool", "_type": "Value" }
},
"builder_name": "parquet",
"dataset_name": "duorc",
"config_name": "SelfRC",
"version": { "version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0 },
"splits": {
"train": {
"name": "train",
"num_bytes": 248966361,
"num_examples": 60721,
"dataset_name": null
},
"validation": {
"name": "validation",
"num_bytes": 56359392,
"num_examples": 12961,
"dataset_name": null
},
"test": {
"name": "test",
"num_bytes": 51022318,
"num_examples": 12559,
"dataset_name": null
}
},
"download_size": 21001846,
"dataset_size": 356348071
},
"partial": false
}