Skip to content

Commit 87b96bd

Browse files
committed
Fix attach urls
1 parent d11117f commit 87b96bd

File tree

5 files changed

+186
-167
lines changed

5 files changed

+186
-167
lines changed

superannotate/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ def consensus(*args, **kwargs):
5050
download_image_preannotations, get_image_annotations, get_image_bytes,
5151
get_image_metadata, get_image_preannotations, search_images,
5252
search_images_all_folders, set_image_annotation_status,
53-
set_images_annotation_statuses, upload_image_annotations, get_project_root_folder_id
53+
set_images_annotation_statuses, upload_image_annotations,
54+
get_project_root_folder_id
5455
)
5556
from .db.project_api import (
5657
create_folder, delete_folders, get_folder_metadata,

superannotate/common.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@
5252
}
5353

5454
_MODEL_TRAINING_TASKS = {
55-
"Instance Segmentation for Pixel Projects" : "instance_segmentation_pixel",
56-
"Instance Segmentation for Vector Projects" : "instance_segmentation_vector",
57-
"Keypoint Detection for Vector Projects" : "keypoint_detection_vector",
58-
"Object Detection for Vector Projects" : "object_detection_vector",
59-
"Semantic Segmentation for Pixel Projects" : "semantic_segmentation_pixel"
55+
"Instance Segmentation for Pixel Projects": "instance_segmentation_pixel",
56+
"Instance Segmentation for Vector Projects": "instance_segmentation_vector",
57+
"Keypoint Detection for Vector Projects": "keypoint_detection_vector",
58+
"Object Detection for Vector Projects": "object_detection_vector",
59+
"Semantic Segmentation for Pixel Projects": "semantic_segmentation_pixel"
6060
}
6161

6262

superannotate/db/projects.py

Lines changed: 14 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,11 @@
3838
get_project_metadata_with_users
3939
)
4040
from .users import get_team_contributor_metadata
41-
from .utils import _get_upload_auth_token, _get_boto_session_by_credentials, _upload_images, create_empty_annotation, upload_image_array_to_s3, get_image_array_to_upload
41+
from .utils import _get_upload_auth_token, _get_boto_session_by_credentials, _upload_images, _attach_urls
42+
from tqdm import tqdm
4243

44+
_NUM_THREADS = 10
45+
_TIME_TO_UPDATE_IN_TQDM = 1
4346
logger = logging.getLogger("superannotate-python-sdk")
4447

4548
_api = API.get_instance()
@@ -690,7 +693,6 @@ def attach_image_urls_to_project(
690693
:return: list of linked image names, list of failed image names, list of duplicate image names
691694
:rtype: tuple
692695
"""
693-
694696
project, folder = get_project_and_folder_metadata(project)
695697
folder_name = project["name"] + (f'/{folder["name"]}' if folder else "")
696698
upload_state = common.upload_state_int_to_str(project.get("upload_state"))
@@ -703,166 +705,30 @@ def attach_image_urls_to_project(
703705
team_id, project_id = project["team_id"], project["id"]
704706
image_data = pd.read_csv(attachments, dtype=str)
705707
image_data = image_data[~image_data["url"].isnull()]
706-
existing_names = image_data[~image_data["name"].isnull()]
707-
duplicate_idx_csv = existing_names.duplicated(subset="name", keep="first")
708-
duplicate_images = existing_names[duplicate_idx_csv]["name"].tolist()
709-
existing_names = existing_names[~duplicate_idx_csv]
710-
existing_images = search_images((project, folder))
711-
duplicate_idx = []
712708
for ind, _ in image_data[image_data["name"].isnull()].iterrows():
713-
while True:
714-
name_try = str(uuid.uuid4())
715-
if name_try not in existing_images:
716-
image_data.at[ind, "name"] = name_try
717-
existing_images.append(name_try)
718-
break
719-
image_data.drop_duplicates(subset="name", keep="first", inplace=True)
720-
for ind, row in existing_names.iterrows():
721-
if row["name"] in existing_images:
722-
duplicate_idx.append(ind)
723-
duplicate_images.extend(image_data.loc[duplicate_idx]["name"].tolist())
724-
image_data.drop(labels=duplicate_idx, inplace=True)
725-
if len(duplicate_images) != 0:
726-
logger.warning(
727-
"%s already existing images found that won't be uploaded.",
728-
len(duplicate_images)
729-
)
709+
name_try = str(uuid.uuid4())
710+
image_data.at[ind, "name"] = name_try
730711
image_data = pd.DataFrame(image_data, columns=["name", "url"])
731712
img_names_urls = image_data.values.tolist()
732-
logger.info(
733-
"Uploading %s images to project %s.", len(img_names_urls), folder_name
734-
)
735-
if len(img_names_urls) == 0:
736-
return ([], [], duplicate_images)
737713

738714
if folder:
739715
folder_id = folder["id"]
740716
else:
741717
folder_id = get_project_root_folder_id(project)
742718

743-
params = {'team_id': team_id, 'folder_id': folder_id}
744-
uploaded = [[] for _ in range(_NUM_THREADS)]
745-
tried_upload = [[] for _ in range(_NUM_THREADS)]
746-
couldnt_upload = [[] for _ in range(_NUM_THREADS)]
747-
finish_event = threading.Event()
748-
749-
res = _get_upload_auth_token(params=params, project_id=project_id)
750-
751-
prefix = res['filePath']
752-
limit = res['availableImageCount']
753-
images_to_upload = img_names_urls[:limit]
754-
images_to_skip = img_names_urls[limit:]
755-
chunksize = int(math.ceil(len(images_to_upload) / _NUM_THREADS))
756-
757-
tqdm_thread = threading.Thread(
758-
target=__tqdm_thread_image_upload,
759-
args=(len(images_to_upload), tried_upload, finish_event),
760-
daemon=True
719+
list_of_uploaded, list_of_not_uploaded, duplicate_images = _attach_urls(
720+
img_names_urls=img_names_urls,
721+
team_id=team_id,
722+
folder_id=folder_id,
723+
project_id=project_id,
724+
annotation_status=annotation_status,
725+
project=project,
726+
folder_name=folder_name
761727
)
762-
tqdm_thread.start()
763-
threads = []
764-
for thread_id in range(_NUM_THREADS):
765-
t = threading.Thread(
766-
target=__attach_image_urls_to_project_thread,
767-
args=(
768-
res, images_to_upload, project, annotation_status, prefix,
769-
thread_id, chunksize, couldnt_upload, uploaded, tried_upload,
770-
folder_id
771-
),
772-
daemon=True
773-
)
774-
threads.append(t)
775-
t.start()
776-
for t in threads:
777-
t.join()
778-
finish_event.set()
779-
tqdm_thread.join()
780-
list_of_not_uploaded = []
781-
for couldnt_upload_thread in couldnt_upload:
782-
for f in couldnt_upload_thread:
783-
list_of_not_uploaded.append(str(f))
784-
list_of_uploaded = []
785-
for upload_thread in uploaded:
786-
for f in upload_thread:
787-
list_of_uploaded.append(str(f))
788728

789-
list_of_not_uploaded += [i[0] for i in images_to_skip]
790729
return (list_of_uploaded, list_of_not_uploaded, duplicate_images)
791730

792731

793-
def __attach_image_urls_to_project_thread(
794-
res, img_names_urls, project, annotation_status, prefix, thread_id,
795-
chunksize, couldnt_upload, uploaded, tried_upload, project_folder_id
796-
):
797-
len_img_paths = len(img_names_urls)
798-
start_index = thread_id * chunksize
799-
end_index = start_index + chunksize
800-
if start_index >= len_img_paths:
801-
return
802-
s3_session = _get_boto_session_by_credentials(res)
803-
s3_resource = s3_session.resource('s3')
804-
bucket = s3_resource.Bucket(res["bucket"])
805-
prefix = res['filePath']
806-
uploaded_imgs = []
807-
uploaded_imgs_info = ([], [], [])
808-
for i in range(start_index, end_index):
809-
if i >= len_img_paths:
810-
break
811-
name, _ = img_names_urls[i]
812-
tried_upload[thread_id].append(name)
813-
img_name_hash = str(uuid.uuid4()) + Path(name).suffix
814-
key = prefix + img_name_hash
815-
try:
816-
bucket.put_object(
817-
Body=json.dumps(create_empty_annotation((None, None), name)),
818-
Key=key + ".json"
819-
)
820-
except Exception as e:
821-
logger.warning("Unable to upload image %s. %s", name, e)
822-
couldnt_upload[thread_id].append(name)
823-
continue
824-
else:
825-
uploaded_imgs.append(name)
826-
uploaded_imgs_info[0].append(img_names_urls[i])
827-
uploaded_imgs_info[1].append(key)
828-
uploaded_imgs_info[2].append((None, None))
829-
if len(uploaded_imgs) >= 100:
830-
try:
831-
__create_image(
832-
uploaded_imgs_info[0],
833-
uploaded_imgs_info[1],
834-
project,
835-
annotation_status,
836-
prefix,
837-
uploaded_imgs_info[2],
838-
project_folder_id,
839-
upload_state="External"
840-
)
841-
except SABaseException as e:
842-
couldnt_upload[thread_id] += uploaded_imgs
843-
logger.warning(e)
844-
else:
845-
uploaded[thread_id] += uploaded_imgs
846-
uploaded_imgs = []
847-
uploaded_imgs_info = ([], [], [])
848-
try:
849-
__create_image(
850-
uploaded_imgs_info[0],
851-
uploaded_imgs_info[1],
852-
project,
853-
annotation_status,
854-
prefix,
855-
uploaded_imgs_info[2],
856-
project_folder_id,
857-
upload_state="External"
858-
)
859-
except SABaseException as e:
860-
couldnt_upload[thread_id] += uploaded_imgs
861-
logger.warning(e)
862-
else:
863-
uploaded[thread_id] += uploaded_imgs
864-
865-
866732
def upload_images_from_public_urls_to_project(
867733
project,
868734
img_urls,

superannotate/db/search_projects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def search_projects(
2727
params = {
2828
'team_id': str(_api.team_id),
2929
'offset': 0,
30-
'limit' : limit,
30+
'limit': limit,
3131
'completeImagesCount': include_complete_image_count
3232
}
3333
if name is not None:

0 commit comments

Comments
 (0)