|
20 | 20 | import requests |
21 | 21 | from azure.storage.blob import BlobServiceClient |
22 | 22 | from google.cloud import storage |
23 | | -from PIL import Image, ImageOps |
24 | | -from tqdm import tqdm |
25 | 23 |
|
26 | 24 | from .. import common |
27 | 25 | from ..api import API |
28 | 26 | from ..exceptions import ( |
29 | | - SABaseException, SAExistingProjectNameException, SAImageSizeTooLarge, |
| 27 | + SABaseException, SAExistingProjectNameException, |
30 | 28 | SANonExistingProjectNameException |
31 | 29 | ) |
32 | 30 | from .annotation_classes import ( |
|
40 | 38 | get_project_metadata_with_users |
41 | 39 | ) |
42 | 40 | from .users import get_team_contributor_metadata |
43 | | -from .utils import _get_upload_auth_token, _get_boto_session_by_credentials |
| 41 | +from .utils import _get_upload_auth_token, _get_boto_session_by_credentials, _upload_images, create_empty_annotation, upload_image_array_to_s3, get_image_array_to_upload |
44 | 42 |
|
45 | 43 | logger = logging.getLogger("superannotate-python-sdk") |
46 | 44 |
|
47 | 45 | _api = API.get_instance() |
48 | | -_NUM_THREADS = 10 |
49 | | -_TIME_TO_UPDATE_IN_TQDM = 1 |
50 | 46 |
|
51 | 47 |
|
52 | 48 | def create_project(project_name, project_description, project_type): |
@@ -593,236 +589,6 @@ def upload_images_from_folder_to_project( |
593 | 589 | ) |
594 | 590 |
|
595 | 591 |
|
596 | | -def create_empty_annotation(size, image_name): |
597 | | - return { |
598 | | - "metadata": { |
599 | | - 'height': size[1], |
600 | | - 'width': size[0], |
601 | | - 'name': image_name |
602 | | - } |
603 | | - } |
604 | | - |
605 | | - |
606 | | -def upload_image_array_to_s3( |
607 | | - bucket, img_name, img_name_hash, size, orig_image, lores_image, huge_image, |
608 | | - thumbnail_image, prefix |
609 | | -): |
610 | | - key = prefix + img_name_hash |
611 | | - bucket.put_object(Body=orig_image, Key=key) |
612 | | - bucket.put_object(Body=lores_image, Key=key + '___lores.jpg') |
613 | | - bucket.put_object( |
614 | | - Body=huge_image, |
615 | | - Key=key + '___huge.jpg', |
616 | | - Metadata={ |
617 | | - 'height': str(size[1]), |
618 | | - 'width': str(size[0]) |
619 | | - } |
620 | | - ) |
621 | | - bucket.put_object(Body=thumbnail_image, Key=key + '___thumb.jpg') |
622 | | - bucket.put_object( |
623 | | - Body=json.dumps(create_empty_annotation(size, img_name)), |
624 | | - Key=key + ".json" |
625 | | - ) |
626 | | - return key |
627 | | - |
628 | | - |
629 | | -def get_image_array_to_upload( |
630 | | - img_name, byte_io_orig, image_quality_in_editor, project_type |
631 | | -): |
632 | | - if image_quality_in_editor not in ["original", "compressed"]: |
633 | | - raise SABaseException(0, "NA ImageQuality in get_image_array_to_upload") |
634 | | - Image.MAX_IMAGE_PIXELS = None |
635 | | - im = Image.open(byte_io_orig) |
636 | | - im_format = im.format |
637 | | - |
638 | | - im = ImageOps.exif_transpose(im) |
639 | | - |
640 | | - width, height = im.size |
641 | | - |
642 | | - resolution = width * height |
643 | | - if resolution > common.MAX_IMAGE_RESOLUTION[project_type]: |
644 | | - raise SABaseException( |
645 | | - 0, "Image resolution " + str(resolution) + |
646 | | - " too large. Max supported for " + project_type + " projects is " + |
647 | | - str(common.MAX_IMAGE_RESOLUTION[project_type]) |
648 | | - ) |
649 | | - |
650 | | - if image_quality_in_editor == "original" and im_format in ['JPEG', 'JPG']: |
651 | | - byte_io_lores = io.BytesIO(byte_io_orig.getbuffer()) |
652 | | - else: |
653 | | - byte_io_lores = io.BytesIO() |
654 | | - bg = Image.new('RGBA', im.size, (255, 255, 255)) |
655 | | - im = im.convert("RGBA") |
656 | | - bg.paste(im, mask=im) |
657 | | - bg = bg.convert('RGB') |
658 | | - if image_quality_in_editor == "original": |
659 | | - bg.save(byte_io_lores, 'JPEG', quality=100, subsampling=0) |
660 | | - else: |
661 | | - bg.save(byte_io_lores, 'JPEG', quality=60) |
662 | | - im = bg |
663 | | - |
664 | | - byte_io_huge = io.BytesIO() |
665 | | - hsize = int(height * 600.0 / width) |
666 | | - im.resize((600, hsize), Image.ANTIALIAS).save(byte_io_huge, 'JPEG') |
667 | | - |
668 | | - byte_io_thumbs = io.BytesIO() |
669 | | - thumbnail_size = (128, 96) |
670 | | - background = Image.new('RGB', thumbnail_size, "black") |
671 | | - im.thumbnail(thumbnail_size, Image.ANTIALIAS) |
672 | | - (w, h) = im.size |
673 | | - background.paste( |
674 | | - im, ((thumbnail_size[0] - w) // 2, (thumbnail_size[1] - h) // 2) |
675 | | - ) |
676 | | - im = background |
677 | | - im.save(byte_io_thumbs, 'JPEG') |
678 | | - |
679 | | - byte_io_thumbs.seek(0) |
680 | | - byte_io_lores.seek(0) |
681 | | - byte_io_huge.seek(0) |
682 | | - byte_io_orig.seek(0) |
683 | | - |
684 | | - img_name_hash = str(uuid.uuid4()) + Path(img_name).suffix |
685 | | - return img_name, img_name_hash, ( |
686 | | - width, height |
687 | | - ), byte_io_orig, byte_io_lores, byte_io_huge, byte_io_thumbs |
688 | | - |
689 | | - |
690 | | -def __upload_images_to_aws_thread( |
691 | | - res, img_paths, project, annotation_status, prefix, thread_id, chunksize, |
692 | | - couldnt_upload, uploaded, tried_upload, image_quality_in_editor, |
693 | | - from_s3_bucket, project_folder_id |
694 | | -): |
695 | | - len_img_paths = len(img_paths) |
696 | | - start_index = thread_id * chunksize |
697 | | - end_index = start_index + chunksize |
698 | | - if from_s3_bucket is not None: |
699 | | - from_session = boto3.Session() |
700 | | - from_s3 = from_session.resource('s3') |
701 | | - if start_index >= len_img_paths: |
702 | | - return |
703 | | - s3_session = _get_boto_session_by_credentials(res) |
704 | | - s3_resource = s3_session.resource('s3') |
705 | | - bucket = s3_resource.Bucket(res["bucket"]) |
706 | | - prefix = res['filePath'] |
707 | | - uploaded_imgs = [] |
708 | | - uploaded_imgs_info = ([], [], []) |
709 | | - for i in range(start_index, end_index): |
710 | | - if i >= len_img_paths: |
711 | | - break |
712 | | - path = img_paths[i] |
713 | | - tried_upload[thread_id].append(path) |
714 | | - try: |
715 | | - if from_s3_bucket is not None: |
716 | | - file = io.BytesIO() |
717 | | - from_s3_object = from_s3.Object(from_s3_bucket, path) |
718 | | - file_size = from_s3_object.content_length |
719 | | - if file_size > common.MAX_IMAGE_SIZE: |
720 | | - raise SAImageSizeTooLarge(file_size) |
721 | | - from_s3_object.download_fileobj(file) |
722 | | - else: |
723 | | - file_size = Path(path).stat().st_size |
724 | | - if file_size > common.MAX_IMAGE_SIZE: |
725 | | - raise SAImageSizeTooLarge(file_size) |
726 | | - with open(path, "rb") as f: |
727 | | - file = io.BytesIO(f.read()) |
728 | | - images_array = get_image_array_to_upload( |
729 | | - Path(path).name, file, image_quality_in_editor, project["type"] |
730 | | - ) |
731 | | - key = upload_image_array_to_s3(bucket, *images_array, prefix) |
732 | | - except Exception as e: |
733 | | - logger.warning("Unable to upload image %s. %s", path, e) |
734 | | - couldnt_upload[thread_id].append(path) |
735 | | - continue |
736 | | - else: |
737 | | - uploaded_imgs.append(path) |
738 | | - uploaded_imgs_info[0].append(Path(path).name) |
739 | | - uploaded_imgs_info[1].append(key) |
740 | | - uploaded_imgs_info[2].append(images_array[2]) |
741 | | - if len(uploaded_imgs) >= 100: |
742 | | - try: |
743 | | - __create_image( |
744 | | - uploaded_imgs_info[0], |
745 | | - uploaded_imgs_info[1], |
746 | | - project, |
747 | | - annotation_status, |
748 | | - prefix, |
749 | | - uploaded_imgs_info[2], |
750 | | - project_folder_id, |
751 | | - upload_state="Basic" |
752 | | - ) |
753 | | - except SABaseException as e: |
754 | | - couldnt_upload[thread_id] += uploaded_imgs |
755 | | - logger.warning(e) |
756 | | - else: |
757 | | - uploaded[thread_id] += uploaded_imgs |
758 | | - uploaded_imgs = [] |
759 | | - uploaded_imgs_info = ([], [], []) |
760 | | - try: |
761 | | - __create_image( |
762 | | - uploaded_imgs_info[0], |
763 | | - uploaded_imgs_info[1], |
764 | | - project, |
765 | | - annotation_status, |
766 | | - prefix, |
767 | | - uploaded_imgs_info[2], |
768 | | - project_folder_id, |
769 | | - upload_state="Basic" |
770 | | - ) |
771 | | - except SABaseException as e: |
772 | | - couldnt_upload[thread_id] += uploaded_imgs |
773 | | - logger.warning(e) |
774 | | - else: |
775 | | - uploaded[thread_id] += uploaded_imgs |
776 | | - |
777 | | - |
778 | | -def __create_image( |
779 | | - img_names, |
780 | | - img_paths, |
781 | | - project, |
782 | | - annotation_status, |
783 | | - remote_dir, |
784 | | - sizes, |
785 | | - project_folder_id, |
786 | | - upload_state="Initial" |
787 | | -): |
788 | | - if len(img_paths) == 0: |
789 | | - return |
790 | | - team_id, project_id = project["team_id"], project["id"] |
791 | | - upload_state_code = common.upload_state_str_to_int(upload_state) |
792 | | - data = { |
793 | | - "project_id": str(project_id), |
794 | | - "team_id": str(team_id), |
795 | | - "images": [], |
796 | | - "annotation_status": annotation_status, |
797 | | - "meta": {}, |
798 | | - "upload_state": upload_state_code |
799 | | - } |
800 | | - if project_folder_id is not None: |
801 | | - data["folder_id"] = project_folder_id |
802 | | - for img_data, img_path, size in zip(img_names, img_paths, sizes): |
803 | | - img_name_uuid = Path(img_path).name |
804 | | - remote_path = remote_dir + f"{img_name_uuid}" |
805 | | - if upload_state == "External": |
806 | | - img_name, img_url = img_data |
807 | | - else: |
808 | | - img_name, img_url = img_data, remote_path |
809 | | - data["images"].append({"name": img_name, "path": img_url}) |
810 | | - data["meta"][img_name] = { |
811 | | - "width": size[0], |
812 | | - "height": size[1], |
813 | | - "annotation_json_path": remote_path + ".json", |
814 | | - "annotation_bluemap_path": remote_path + ".png" |
815 | | - } |
816 | | - |
817 | | - response = _api.send_request( |
818 | | - req_type='POST', path='/image/ext-create', json_req=data |
819 | | - ) |
820 | | - if not response.ok: |
821 | | - raise SABaseException( |
822 | | - response.status_code, "Couldn't ext-create image " + response.text |
823 | | - ) |
824 | | - |
825 | | - |
826 | 592 | def upload_images_to_project( |
827 | 593 | project, |
828 | 594 | img_paths, |
@@ -870,82 +636,24 @@ def upload_images_to_project( |
870 | 636 | project |
871 | 637 | ) |
872 | 638 | team_id, project_id = project["team_id"], project["id"] |
873 | | - existing_images = search_images((project, folder)) |
874 | | - duplicate_images = [] |
875 | | - for existing_image in existing_images: |
876 | | - i = -1 |
877 | | - for j, img_path in enumerate(img_paths): |
878 | | - if Path(img_path).name == existing_image: |
879 | | - i = j |
880 | | - break |
881 | | - if i != -1: |
882 | | - duplicate_images.append(str(img_paths[i])) |
883 | | - del img_paths[i] |
884 | | - if len(duplicate_images) != 0: |
885 | | - logger.warning( |
886 | | - "%s already existing images found that won't be uploaded.", |
887 | | - len(duplicate_images) |
888 | | - ) |
889 | | - len_img_paths = len(img_paths) |
890 | | - logger.info( |
891 | | - "Uploading %s images to project %s.", len_img_paths, folder_name |
892 | | - ) |
893 | | - if len_img_paths == 0: |
894 | | - return ([], [], duplicate_images) |
895 | 639 |
|
896 | 640 | if folder: |
897 | 641 | folder_id = folder["id"] |
898 | 642 | else: |
899 | 643 | folder_id = get_project_root_folder_id(project) |
900 | 644 |
|
901 | | - params = {'team_id': team_id, 'folder_id': folder_id} |
902 | | - uploaded = [[] for _ in range(_NUM_THREADS)] |
903 | | - tried_upload = [[] for _ in range(_NUM_THREADS)] |
904 | | - couldnt_upload = [[] for _ in range(_NUM_THREADS)] |
905 | | - finish_event = threading.Event() |
906 | | - |
907 | | - res = _get_upload_auth_token(params=params, project_id=project_id) |
908 | | - |
909 | | - prefix = res['filePath'] |
910 | | - limit = res['availableImageCount'] |
911 | | - images_to_upload = img_paths[:limit] |
912 | | - images_to_skip = [str(path) for path in img_paths[limit:]] |
913 | | - chunksize = int(math.ceil(len(images_to_upload) / _NUM_THREADS)) |
914 | | - |
915 | | - tqdm_thread = threading.Thread( |
916 | | - target=__tqdm_thread_image_upload, |
917 | | - args=(len_img_paths, tried_upload, finish_event), |
918 | | - daemon=True |
| 645 | + list_of_uploaded, list_of_not_uploaded, duplicate_images = _upload_images( |
| 646 | + img_paths=img_paths, |
| 647 | + team_id=team_id, |
| 648 | + folder_id=folder_id, |
| 649 | + project_id=project_id, |
| 650 | + annotation_status=annotation_status, |
| 651 | + from_s3_bucket=from_s3_bucket, |
| 652 | + image_quality_in_editor=image_quality_in_editor, |
| 653 | + project=project, |
| 654 | + folder_name=folder_name |
919 | 655 | ) |
920 | | - tqdm_thread.start() |
921 | 656 |
|
922 | | - threads = [] |
923 | | - for thread_id in range(_NUM_THREADS): |
924 | | - t = threading.Thread( |
925 | | - target=__upload_images_to_aws_thread, |
926 | | - args=( |
927 | | - res, images_to_upload, project, annotation_status, prefix, |
928 | | - thread_id, chunksize, couldnt_upload, uploaded, tried_upload, |
929 | | - image_quality_in_editor, from_s3_bucket, folder_id |
930 | | - ), |
931 | | - daemon=True |
932 | | - ) |
933 | | - threads.append(t) |
934 | | - t.start() |
935 | | - for t in threads: |
936 | | - t.join() |
937 | | - finish_event.set() |
938 | | - tqdm_thread.join() |
939 | | - list_of_not_uploaded = [] |
940 | | - for couldnt_upload_thread in couldnt_upload: |
941 | | - for f in couldnt_upload_thread: |
942 | | - list_of_not_uploaded.append(str(f)) |
943 | | - list_of_uploaded = [] |
944 | | - for upload_thread in uploaded: |
945 | | - for f in upload_thread: |
946 | | - list_of_uploaded.append(str(f)) |
947 | | - |
948 | | - list_of_not_uploaded += images_to_skip |
949 | 657 | return (list_of_uploaded, list_of_not_uploaded, duplicate_images) |
950 | 658 |
|
951 | 659 |
|
@@ -1749,20 +1457,6 @@ def _upload_annotations_from_folder_to_project( |
1749 | 1457 | return (list_of_uploaded, list_of_not_uploaded, list_of_missing_images) |
1750 | 1458 |
|
1751 | 1459 |
|
1752 | | -def __tqdm_thread_image_upload(total_num, tried_upload, finish_event): |
1753 | | - with tqdm(total=total_num) as pbar: |
1754 | | - while True: |
1755 | | - finished = finish_event.wait(_TIME_TO_UPDATE_IN_TQDM) |
1756 | | - if not finished: |
1757 | | - sum_all = 0 |
1758 | | - for i in tried_upload: |
1759 | | - sum_all += len(i) |
1760 | | - pbar.update(sum_all - pbar.n) |
1761 | | - else: |
1762 | | - pbar.update(total_num - pbar.n) |
1763 | | - break |
1764 | | - |
1765 | | - |
1766 | 1460 | def __tqdm_thread_upload_annotations( |
1767 | 1461 | total_num, uploaded, couldnt_upload, missing_image, finish_event |
1768 | 1462 | ): |
|
0 commit comments