lep1's picture
Upload 14 files
904d7fb verified
raw
history blame
2.89 kB
import json
import numpy as np
import torch
def convert_to_braille_unicode(str_input: str, path: str = "./src/utils/number_map.json") -> str:
with open(path, "r") as fl:
data = json.load(fl)
if str_input in data.keys():
str_output = data[str_input]
return str_output
def parse_xywh_and_class(boxes: torch.Tensor) -> list:
"""
boxes input tensor
boxes (torch.Tensor) or (numpy.ndarray): A tensor or numpy array containing the detection boxes,
with shape (num_boxes, 6).
orig_shape (torch.Tensor) or (numpy.ndarray): Original image size, in the format (height, width).
Properties:
xyxy (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format.
conf (torch.Tensor) or (numpy.ndarray): The confidence values of the boxes.
cls (torch.Tensor) or (numpy.ndarray): The class values of the boxes.
xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format.
xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size.
xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size.
"""
# copy values from troublesome "boxes" object to numpy array
new_boxes = np.zeros(boxes.shape)
new_boxes[:, :4] = boxes.xywh.cpu().numpy() # first 4 channels are xywh
new_boxes[:, 4] = boxes.conf.cpu().numpy() # 5th channel is confidence
new_boxes[:, 5] = boxes.cls.cpu().numpy() # 6th channel is class which is last channel
# sort according to y coordinate
new_boxes = new_boxes[new_boxes[:, 1].argsort()]
# find threshold index to break the line
y_threshold = np.mean(new_boxes[:, 3]) // 2
boxes_diff = np.diff(new_boxes[:, 1])
threshold_index = np.where(boxes_diff > y_threshold)[0]
# cluster according to threshold_index
boxes_clustered = np.split(new_boxes, threshold_index + 1)
boxes_return = []
for cluster in boxes_clustered:
# sort according to x coordinate
cluster = cluster[cluster[:, 0].argsort()]
boxes_return.append(cluster)
return boxes_return
def arrange_braille_to_2x3(box_classes: list) -> list:
"""
将检测到的盲文字符类别数组转为 2x3 点阵格式。
:param box_classes: 检测到的盲文字符类别列表 (长度必须是6的倍数)
:return: 2x3 盲文点阵列表
"""
# 检查输入长度是否为6的倍数
if len(box_classes) % 6 != 0:
raise ValueError("输入的盲文字符数组长度必须是6的倍数")
braille_2x3_list = []
# 每次取6个字符并将它们排成2x3格式
for i in range(0, len(box_classes), 6):
# reshape为3x2矩阵然后转置为2x3矩阵
braille_char = np.array(box_classes[i:i + 6]).reshape(3, 2).T
braille_2x3_list.append(braille_char)
return braille_2x3_list