3838 get_project_metadata_with_users
3939)
4040from .users import get_team_contributor_metadata
41- from .utils import _get_upload_auth_token , _get_boto_session_by_credentials , _upload_images , create_empty_annotation , upload_image_array_to_s3 , get_image_array_to_upload
41+ from .utils import _get_upload_auth_token , _get_boto_session_by_credentials , _upload_images , _attach_urls
42+ from tqdm import tqdm
4243
44+ _NUM_THREADS = 10
45+ _TIME_TO_UPDATE_IN_TQDM = 1
4346logger = logging .getLogger ("superannotate-python-sdk" )
4447
4548_api = API .get_instance ()
@@ -690,7 +693,6 @@ def attach_image_urls_to_project(
690693 :return: list of linked image names, list of failed image names, list of duplicate image names
691694 :rtype: tuple
692695 """
693-
694696 project , folder = get_project_and_folder_metadata (project )
695697 folder_name = project ["name" ] + (f'/{ folder ["name" ]} ' if folder else "" )
696698 upload_state = common .upload_state_int_to_str (project .get ("upload_state" ))
@@ -703,166 +705,30 @@ def attach_image_urls_to_project(
703705 team_id , project_id = project ["team_id" ], project ["id" ]
704706 image_data = pd .read_csv (attachments , dtype = str )
705707 image_data = image_data [~ image_data ["url" ].isnull ()]
706- existing_names = image_data [~ image_data ["name" ].isnull ()]
707- duplicate_idx_csv = existing_names .duplicated (subset = "name" , keep = "first" )
708- duplicate_images = existing_names [duplicate_idx_csv ]["name" ].tolist ()
709- existing_names = existing_names [~ duplicate_idx_csv ]
710- existing_images = search_images ((project , folder ))
711- duplicate_idx = []
712708 for ind , _ in image_data [image_data ["name" ].isnull ()].iterrows ():
713- while True :
714- name_try = str (uuid .uuid4 ())
715- if name_try not in existing_images :
716- image_data .at [ind , "name" ] = name_try
717- existing_images .append (name_try )
718- break
719- image_data .drop_duplicates (subset = "name" , keep = "first" , inplace = True )
720- for ind , row in existing_names .iterrows ():
721- if row ["name" ] in existing_images :
722- duplicate_idx .append (ind )
723- duplicate_images .extend (image_data .loc [duplicate_idx ]["name" ].tolist ())
724- image_data .drop (labels = duplicate_idx , inplace = True )
725- if len (duplicate_images ) != 0 :
726- logger .warning (
727- "%s already existing images found that won't be uploaded." ,
728- len (duplicate_images )
729- )
709+ name_try = str (uuid .uuid4 ())
710+ image_data .at [ind , "name" ] = name_try
730711 image_data = pd .DataFrame (image_data , columns = ["name" , "url" ])
731712 img_names_urls = image_data .values .tolist ()
732- logger .info (
733- "Uploading %s images to project %s." , len (img_names_urls ), folder_name
734- )
735- if len (img_names_urls ) == 0 :
736- return ([], [], duplicate_images )
737713
738714 if folder :
739715 folder_id = folder ["id" ]
740716 else :
741717 folder_id = get_project_root_folder_id (project )
742718
743- params = {'team_id' : team_id , 'folder_id' : folder_id }
744- uploaded = [[] for _ in range (_NUM_THREADS )]
745- tried_upload = [[] for _ in range (_NUM_THREADS )]
746- couldnt_upload = [[] for _ in range (_NUM_THREADS )]
747- finish_event = threading .Event ()
748-
749- res = _get_upload_auth_token (params = params , project_id = project_id )
750-
751- prefix = res ['filePath' ]
752- limit = res ['availableImageCount' ]
753- images_to_upload = img_names_urls [:limit ]
754- images_to_skip = img_names_urls [limit :]
755- chunksize = int (math .ceil (len (images_to_upload ) / _NUM_THREADS ))
756-
757- tqdm_thread = threading .Thread (
758- target = __tqdm_thread_image_upload ,
759- args = (len (images_to_upload ), tried_upload , finish_event ),
760- daemon = True
719+ list_of_uploaded , list_of_not_uploaded , duplicate_images = _attach_urls (
720+ img_names_urls = img_names_urls ,
721+ team_id = team_id ,
722+ folder_id = folder_id ,
723+ project_id = project_id ,
724+ annotation_status = annotation_status ,
725+ project = project ,
726+ folder_name = folder_name
761727 )
762- tqdm_thread .start ()
763- threads = []
764- for thread_id in range (_NUM_THREADS ):
765- t = threading .Thread (
766- target = __attach_image_urls_to_project_thread ,
767- args = (
768- res , images_to_upload , project , annotation_status , prefix ,
769- thread_id , chunksize , couldnt_upload , uploaded , tried_upload ,
770- folder_id
771- ),
772- daemon = True
773- )
774- threads .append (t )
775- t .start ()
776- for t in threads :
777- t .join ()
778- finish_event .set ()
779- tqdm_thread .join ()
780- list_of_not_uploaded = []
781- for couldnt_upload_thread in couldnt_upload :
782- for f in couldnt_upload_thread :
783- list_of_not_uploaded .append (str (f ))
784- list_of_uploaded = []
785- for upload_thread in uploaded :
786- for f in upload_thread :
787- list_of_uploaded .append (str (f ))
788728
789- list_of_not_uploaded += [i [0 ] for i in images_to_skip ]
790729 return (list_of_uploaded , list_of_not_uploaded , duplicate_images )
791730
792731
793- def __attach_image_urls_to_project_thread (
794- res , img_names_urls , project , annotation_status , prefix , thread_id ,
795- chunksize , couldnt_upload , uploaded , tried_upload , project_folder_id
796- ):
797- len_img_paths = len (img_names_urls )
798- start_index = thread_id * chunksize
799- end_index = start_index + chunksize
800- if start_index >= len_img_paths :
801- return
802- s3_session = _get_boto_session_by_credentials (res )
803- s3_resource = s3_session .resource ('s3' )
804- bucket = s3_resource .Bucket (res ["bucket" ])
805- prefix = res ['filePath' ]
806- uploaded_imgs = []
807- uploaded_imgs_info = ([], [], [])
808- for i in range (start_index , end_index ):
809- if i >= len_img_paths :
810- break
811- name , _ = img_names_urls [i ]
812- tried_upload [thread_id ].append (name )
813- img_name_hash = str (uuid .uuid4 ()) + Path (name ).suffix
814- key = prefix + img_name_hash
815- try :
816- bucket .put_object (
817- Body = json .dumps (create_empty_annotation ((None , None ), name )),
818- Key = key + ".json"
819- )
820- except Exception as e :
821- logger .warning ("Unable to upload image %s. %s" , name , e )
822- couldnt_upload [thread_id ].append (name )
823- continue
824- else :
825- uploaded_imgs .append (name )
826- uploaded_imgs_info [0 ].append (img_names_urls [i ])
827- uploaded_imgs_info [1 ].append (key )
828- uploaded_imgs_info [2 ].append ((None , None ))
829- if len (uploaded_imgs ) >= 100 :
830- try :
831- __create_image (
832- uploaded_imgs_info [0 ],
833- uploaded_imgs_info [1 ],
834- project ,
835- annotation_status ,
836- prefix ,
837- uploaded_imgs_info [2 ],
838- project_folder_id ,
839- upload_state = "External"
840- )
841- except SABaseException as e :
842- couldnt_upload [thread_id ] += uploaded_imgs
843- logger .warning (e )
844- else :
845- uploaded [thread_id ] += uploaded_imgs
846- uploaded_imgs = []
847- uploaded_imgs_info = ([], [], [])
848- try :
849- __create_image (
850- uploaded_imgs_info [0 ],
851- uploaded_imgs_info [1 ],
852- project ,
853- annotation_status ,
854- prefix ,
855- uploaded_imgs_info [2 ],
856- project_folder_id ,
857- upload_state = "External"
858- )
859- except SABaseException as e :
860- couldnt_upload [thread_id ] += uploaded_imgs
861- logger .warning (e )
862- else :
863- uploaded [thread_id ] += uploaded_imgs
864-
865-
866732def upload_images_from_public_urls_to_project (
867733 project ,
868734 img_urls ,
0 commit comments