File size: 743 Bytes
662ed4b
611a3ed
 
d0f55c6
7e32ac7
2f4d877
d0f55c6
 
39ff146
7e32ac7
 
 
 
 
 
d0f55c6
 
fae0e19
d0f55c6
 
 
 
 
fae0e19
2f4d877
 
662ed4b
 
2f4d877
 
d0f55c6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import io
import json

import httpx
from huggingface_hub import HfFileSystem, hf_hub_url
from huggingface_hub.utils import build_hf_headers


client = httpx.AsyncClient(follow_redirects=True)
fs = HfFileSystem()


def glob(path):
    paths = fs.glob(path)
    return paths


async def load_json_file(path):
    url = to_url(path)
    r = await client.get(url)
    return r.json()


async def load_jsonlines_file(path):
    url = to_url(path)
    r = await client.get(url, headers=build_hf_headers())
    f = io.StringIO(r.text)
    return [json.loads(line) for line in f]


def to_url(path):
    _, org_name, ds_name, filename = path.split("/", 3)
    return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")