Skip to content

Commit 2b0a820

Browse files
committed
width height check
1 parent feab539 commit 2b0a820

11 files changed

+9680
-57
lines changed

superannotate/db/annotation_classes.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,14 @@ def fill_class_and_attribute_ids(annotation_json, annotation_classes_dict):
348348
del attribute["groupId"]
349349

350350

351+
def check_annotation_json(annotation_json):
352+
if "metadata" not in annotation_json or "width" not in annotation_json[
353+
"metadata"] or "height" not in annotation_json["metadata"]:
354+
return False
355+
else:
356+
return True
357+
358+
351359
def get_annotation_classes_id_to_name(annotation_classes):
352360
annotation_classes_dict = {}
353361
for annotation_class in annotation_classes:

superannotate/db/projects.py

Lines changed: 115 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
SANonExistingProjectNameException
3030
)
3131
from .annotation_classes import (
32-
create_annotation_classes_from_classes_json, fill_class_and_attribute_ids,
33-
get_annotation_classes_name_to_id, search_annotation_classes
32+
check_annotation_json, create_annotation_classes_from_classes_json,
33+
fill_class_and_attribute_ids, get_annotation_classes_name_to_id,
34+
search_annotation_classes
3435
)
3536
from .images import search_images
3637
from .project_api import get_project_metadata_bare
@@ -784,7 +785,7 @@ def upload_images_to_project(
784785
response.status_code, "Couldn't get upload token " + response.text
785786
)
786787
tqdm_thread = threading.Thread(
787-
target=__tqdm_thread_image_upload,
788+
target=__tqdm_thread_upload,
788789
args=(len_img_paths, uploaded, couldnt_upload, finish_event),
789790
daemon=True
790791
)
@@ -1077,9 +1078,10 @@ def upload_images_from_azure_blob_to_project(
10771078

10781079
def __upload_annotations_thread(
10791080
team_id, project_id, project_type, anns_filenames, folder_path,
1080-
annotation_classes_dict, thread_id, chunksize, num_uploaded, from_s3_bucket,
1081-
actually_uploaded
1081+
annotation_classes_dict, thread_id, chunksize, missing_images,
1082+
couldnt_upload, uploaded, from_s3_bucket
10821083
):
1084+
print("DEBUG")
10831085
NUM_TO_SEND = 500
10841086
len_anns = len(anns_filenames)
10851087
start_index = thread_id * chunksize
@@ -1096,6 +1098,7 @@ def __upload_annotations_thread(
10961098

10971099
for i in range(start_index, end_index, NUM_TO_SEND):
10981100
data = {"project_id": project_id, "team_id": team_id, "names": []}
1101+
print("DEBUG", flush=True)
10991102
for j in range(i, i + NUM_TO_SEND):
11001103
if j >= end_index:
11011104
break
@@ -1107,8 +1110,14 @@ def __upload_annotations_thread(
11071110
json_req=data
11081111
)
11091112
res = response.json()
1110-
if len(res["images"]) != len(data["names"]):
1111-
logger.warning("Couldn't find all the images for annotation JSONs.")
1113+
if len(res["images"]) < len(data["names"]):
1114+
for name in data["names"]:
1115+
if name not in res["images"]:
1116+
ann_path = Path(folder_path) / (name + postfix_json)
1117+
missing_images[thread_id].append(ann_path)
1118+
logger.warning(
1119+
"Couldn't find image %s for annotation upload", ann_path
1120+
)
11121121
aws_creds = res["creds"]
11131122
s3_session = boto3.Session(
11141123
aws_access_key_id=aws_creds['accessKeyId'],
@@ -1121,18 +1130,22 @@ def __upload_annotations_thread(
11211130
for image_name, image_path in res['images'].items():
11221131
json_filename = image_name + postfix_json
11231132
if from_s3_bucket is None:
1124-
annotation_json = json.load(
1125-
open(Path(folder_path) / json_filename)
1126-
)
1133+
full_path = Path(folder_path) / json_filename
1134+
annotation_json = json.load(open(full_path))
11271135
else:
11281136
file = io.BytesIO()
1129-
from_s3_object = from_s3.Object(
1130-
from_s3_bucket, folder_path + json_filename
1131-
)
1137+
full_path = folder_path + json_filename
1138+
from_s3_object = from_s3.Object(from_s3_bucket, full_path)
11321139
from_s3_object.download_fileobj(file)
11331140
file.seek(0)
11341141
annotation_json = json.load(file)
11351142

1143+
if not check_annotation_json(annotation_json):
1144+
couldnt_upload[thread_id].append(full_path)
1145+
logger.warning(
1146+
"Annotation JSON %s missing width or height info", full_path
1147+
)
1148+
continue
11361149
fill_class_and_attribute_ids(
11371150
annotation_json, annotation_classes_dict
11381151
)
@@ -1152,10 +1165,7 @@ def __upload_annotations_thread(
11521165
from_s3_object.download_fileobj(file)
11531166
file.seek(0)
11541167
bucket.put_object(Key=image_path + postfix_mask, Body=file)
1155-
num_uploaded[thread_id] += 1
1156-
actually_uploaded[thread_id].append(
1157-
Path(folder_path) / json_filename
1158-
)
1168+
uploaded[thread_id].append(full_path)
11591169

11601170

11611171
def upload_annotations_from_folder_to_project(
@@ -1268,14 +1278,22 @@ def _upload_annotations_from_folder_to_project(
12681278
)
12691279
if len_annotations_paths == 0:
12701280
return return_result
1271-
num_uploaded = [0] * _NUM_THREADS
1272-
actually_uploaded = []
1281+
uploaded = []
12731282
for _ in range(_NUM_THREADS):
1274-
actually_uploaded.append([])
1283+
uploaded.append([])
1284+
couldnt_upload = []
1285+
for _ in range(_NUM_THREADS):
1286+
couldnt_upload.append([])
1287+
missing_image = []
1288+
for _ in range(_NUM_THREADS):
1289+
missing_image.append([])
12751290
finish_event = threading.Event()
12761291
tqdm_thread = threading.Thread(
1277-
target=__tqdm_thread,
1278-
args=(len_annotations_paths, num_uploaded, finish_event),
1292+
target=__tqdm_thread_upload_annotations,
1293+
args=(
1294+
len_annotations_paths, uploaded, couldnt_upload, missing_image,
1295+
finish_event
1296+
),
12791297
daemon=True
12801298
)
12811299
tqdm_thread.start()
@@ -1292,7 +1310,7 @@ def _upload_annotations_from_folder_to_project(
12921310
args=(
12931311
team_id, project_id, project_type, annotations_filenames,
12941312
folder_path, annotation_classes_dict, thread_id, chunksize,
1295-
num_uploaded, from_s3_bucket, actually_uploaded
1313+
missing_image, couldnt_upload, uploaded, from_s3_bucket
12961314
),
12971315
daemon=True
12981316
)
@@ -1302,23 +1320,27 @@ def _upload_annotations_from_folder_to_project(
13021320
t.join()
13031321
finish_event.set()
13041322
tqdm_thread.join()
1305-
logger.info("Number of annotations uploaded %s.", sum(num_uploaded))
1306-
if sum(num_uploaded) != len_annotations_paths:
1307-
logger.warning(
1308-
"%s annotations were not uploaded.",
1309-
len_annotations_paths - sum(num_uploaded)
1310-
)
13111323

1312-
for ac_upl in actually_uploaded:
1313-
return_result += [str(p) for p in ac_upl]
1324+
list_of_not_uploaded = []
1325+
for couldnt_upload_thread in couldnt_upload:
1326+
for file in couldnt_upload_thread:
1327+
list_of_not_uploaded.append(str(file))
1328+
list_of_uploaded = []
1329+
for upload_thread in uploaded:
1330+
for file in upload_thread:
1331+
list_of_uploaded.append(str(file))
1332+
list_of_missing_images = []
1333+
for missing_thread in missing_image:
1334+
for file in missing_thread:
1335+
list_of_missing_images.append(str(file))
13141336
# print(return_result)
1315-
return return_result
1337+
return (list_of_uploaded, list_of_not_uploaded, list_of_missing_images)
13161338

13171339

13181340
def __upload_preannotations_thread(
13191341
aws_creds, project_type, preannotations_filenames, folder_path,
1320-
annotation_classes_dict, thread_id, chunksize, num_uploaded,
1321-
already_uploaded, from_s3_bucket
1342+
annotation_classes_dict, thread_id, chunksize, couldnt_upload, uploaded,
1343+
from_s3_bucket
13221344
):
13231345
len_preanns = len(preannotations_filenames)
13241346
start_index = thread_id * chunksize
@@ -1341,20 +1363,24 @@ def __upload_preannotations_thread(
13411363
from_s3 = from_session.resource('s3')
13421364

13431365
for i in range(start_index, end_index):
1344-
if already_uploaded[i]:
1345-
continue
13461366
json_filename = preannotations_filenames[i]
13471367
if from_s3_bucket is None:
1348-
annotation_json = json.load(open(Path(folder_path) / json_filename))
1368+
full_path = Path(folder_path) / json_filename
1369+
annotation_json = json.load(open(full_path))
13491370
else:
13501371
file = io.BytesIO()
1351-
from_s3_object = from_s3.Object(
1352-
from_s3_bucket, folder_path + json_filename
1353-
)
1372+
full_path = folder_path + json_filename
1373+
from_s3_object = from_s3.Object(from_s3_bucket, full_path)
13541374
from_s3_object.download_fileobj(file)
13551375
file.seek(0)
13561376
annotation_json = json.load(file)
13571377

1378+
if not check_annotation_json(annotation_json):
1379+
couldnt_upload[thread_id].append(full_path)
1380+
logger.warning(
1381+
"Annotation JSON %s missing width or height info", full_path
1382+
)
1383+
continue
13581384
fill_class_and_attribute_ids(annotation_json, annotation_classes_dict)
13591385
bucket.put_object(
13601386
Key=aws_creds["filePath"] + f"/{json_filename}",
@@ -1375,22 +1401,45 @@ def __upload_preannotations_thread(
13751401
bucket.put_object(
13761402
Key=aws_creds['filePath'] + f'/{mask_filename}', Body=file
13771403
)
1378-
num_uploaded[thread_id] += 1
1379-
already_uploaded[i] = True
1404+
uploaded[thread_id].append(full_path)
13801405

13811406

1382-
def __tqdm_thread(total_num, current_nums, finish_event):
1407+
def __tqdm_thread_upload(total_num, uploaded, couldnt_upload, finish_event):
13831408
with tqdm(total=total_num) as pbar:
13841409
while True:
13851410
finished = finish_event.wait(5)
13861411
if not finished:
1387-
pbar.update(sum(current_nums) - pbar.n)
1412+
sum_all = 0
1413+
for i in couldnt_upload:
1414+
sum_all += len(i)
1415+
for i in uploaded:
1416+
sum_all += len(i)
1417+
pbar.update(sum_all - pbar.n)
13881418
else:
13891419
pbar.update(total_num - pbar.n)
13901420
break
13911421

13921422

1393-
def __tqdm_thread_image_upload(
1423+
def __tqdm_thread_upload_annotations(
1424+
total_num, uploaded, couldnt_upload, missing_image, finish_event
1425+
):
1426+
with tqdm(total=total_num) as pbar:
1427+
while True:
1428+
finished = finish_event.wait(5)
1429+
if not finished:
1430+
sum_all = 0
1431+
for i in couldnt_upload:
1432+
sum_all += len(i)
1433+
for i in uploaded:
1434+
sum_all += len(i)
1435+
for i in missing_image:
1436+
sum_all += len(i)
1437+
pbar.update(sum_all - pbar.n)
1438+
else:
1439+
pbar.update(total_num - pbar.n)
1440+
break
1441+
1442+
def __tqdm_thread_upload_preannotations(
13941443
total_num, uploaded, couldnt_upload, finish_event
13951444
):
13961445
with tqdm(total=total_num) as pbar:
@@ -1523,13 +1572,19 @@ def _upload_preannotations_from_folder_to_project(
15231572
'creds_only': True,
15241573
'type': common.project_type_str_to_int(project_type)
15251574
}
1526-
num_uploaded = [0] * _NUM_THREADS
1527-
already_uploaded = [False] * len_preannotations_paths
1575+
uploaded = []
1576+
for _ in range(_NUM_THREADS):
1577+
uploaded.append([])
1578+
couldnt_upload = []
1579+
for _ in range(_NUM_THREADS):
1580+
couldnt_upload.append([])
1581+
finish_event = threading.Event()
15281582
chunksize = int(math.ceil(len_preannotations_paths / _NUM_THREADS))
15291583
finish_event = threading.Event()
15301584
tqdm_thread = threading.Thread(
1531-
target=__tqdm_thread,
1532-
args=(len_preannotations_paths, num_uploaded, finish_event),
1585+
target=__tqdm_thread_upload_preannotations,
1586+
1587+
args=(len_preannotations_paths, couldnt_upload, uploaded, finish_event),
15331588
daemon=True
15341589
)
15351590
tqdm_thread.start()
@@ -1552,8 +1607,8 @@ def _upload_preannotations_from_folder_to_project(
15521607
target=__upload_preannotations_thread,
15531608
args=(
15541609
aws_creds, project_type, preannotations_filenames, folder_path,
1555-
annotation_classes_dict, thread_id, chunksize, num_uploaded,
1556-
already_uploaded, from_s3_bucket
1610+
annotation_classes_dict, thread_id, chunksize, couldnt_upload,
1611+
uploaded, from_s3_bucket
15571612
),
15581613
daemon=True
15591614
)
@@ -1563,12 +1618,15 @@ def _upload_preannotations_from_folder_to_project(
15631618
t.join()
15641619
finish_event.set()
15651620
tqdm_thread.join()
1566-
logger.info("Number of preannotations uploaded %s.", sum(num_uploaded))
1567-
if sum(num_uploaded) != len_preannotations_paths:
1568-
logger.warning(
1569-
"%s preannotations were not uploaded.",
1570-
len_preannotations_paths - sum(num_uploaded)
1571-
)
1621+
list_of_not_uploaded = []
1622+
for couldnt_upload_thread in couldnt_upload:
1623+
for file in couldnt_upload_thread:
1624+
list_of_not_uploaded.append(str(file))
1625+
list_of_uploaded = []
1626+
for upload_thread in uploaded:
1627+
for file in upload_thread:
1628+
list_of_uploaded.append(str(file))
1629+
return (list_of_uploaded, list_of_not_uploaded)
15721630
return return_result + [str(p) for p in preannotations_paths]
15731631

15741632

0 commit comments

Comments
 (0)