petrel-oss-python-sdk2 / tests /multi_cluster_test.py
Weiyun1025's picture
Upload folder using huggingface_hub
2abfccb verified
# -*- coding: utf-8 -*-
from petrel_client.client import Client
from multiprocessing import Process
import logging
import random
LOG = logging.getLogger('petrel_client.test')
def f(conf_path, repeat):
client = Client(conf_path)
total_bytes = 0
for itr in range(repeat):
urls = [
'cluster1:s3://my-bucket/object.1', # 从 cluster1 中读取
'cluster2:s3://my-bucket/object.2', # 从 cluster2 中读取
's3://my-bucket/object.3', # 若不指定 cluster,则从配置文件中指定的 default_cluster 中读取
'file://tmp/xxx', # 从 DFS 中读取
'/tmp/xxx', # 若不包含 's3:' 或 'file:',从 DFS 中读取
]
url = random.choice(urls)
body = client.get(url)
if not body:
LOG.warn('can not get content from %s', url)
else:
total_bytes += len(body)
LOG.debug('total_bytes: %s', total_bytes)
conf_path = '~/petreloss.conf'
repeat = 5000
parallelism = 4
process_list = [Process(target=f, args=(conf_path, repeat))
for _ in range(parallelism)]
[p.start() for p in process_list]
[p.join() for p in process_list]