diff --git a/deployments/anvilprod/environment.py b/deployments/anvilprod/environment.py index 054425bd6d..c0fb259b5a 100644 --- a/deployments/anvilprod/environment.py +++ b/deployments/anvilprod/environment.py @@ -1323,6 +1323,7 @@ def env() -> Mapping[str, str | None]: 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org', 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org', 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org', + 'AZUL_TERRA_BILLING_PROJECT': 'terra-aae33465', 'azul_ecm_service_url': 'https://externalcreds.dsde-prod.broadinstitute.org', 'AZUL_ENABLE_MONITORING': '1', diff --git a/deployments/hammerbox/environment.py b/deployments/hammerbox/environment.py index 71b24db322..f9428a05da 100644 --- a/deployments/hammerbox/environment.py +++ b/deployments/hammerbox/environment.py @@ -1337,6 +1337,7 @@ def env() -> Mapping[str, str | None]: 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org', 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org', 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org', + 'AZUL_TERRA_BILLING_PROJECT': 'terra-aae33465', 'azul_ecm_service_url': 'https://externalcreds.dsde-prod.broadinstitute.org', # Personal deployments & `hammerbox` share an ES domain with `anvilprod` diff --git a/environment.py b/environment.py index 06195b3339..bb6e984afa 100644 --- a/environment.py +++ b/environment.py @@ -744,6 +744,12 @@ def env() -> Mapping[str, str | None]: # 'AZUL_TERRA_SERVICE_URL': None, + # The Google Project ID associated with the Terra workspace to charge + # for file downloads while mirroring. If left unset, Terra pays the + # egress cost for the downloads. + # + 'AZUL_TERRA_BILLING_PROJECT': None, + # OAuth2 Client ID to be used for authenticating users. See section # 3.2 of the README # diff --git a/scripts/scratch_7.py b/scripts/scratch_7.py new file mode 100644 index 0000000000..bf1d2f015d --- /dev/null +++ b/scripts/scratch_7.py @@ -0,0 +1,49 @@ +from azul import config +from azul.indexer.mirror_service import MirrorWorkerService +from azul.service import Filters +from azul.service.index_service import IndexService + + +def download(catalog, source_id, file_uuid): + index_service = IndexService() + mirror_service = MirrorWorkerService(catalog=catalog, schema_url_func=None) + file = index_service.get_data_file(catalog=catalog, + file_uuid=file_uuid, + file_version=None, + filters=Filters(explicit={}, + source_ids={source_id})) + assert file is not None + + data = mirror_service._download(file, part=None) + print('Downloaded', len(data), 'bytes') + + +sandbox_args = { + 'source_id': 'b1083e8b-4de9-467a-97de-18179c4e6bd1', + 'file_uuid': '60e25442-aba0-4934-af42-be0d536112de' +} + +hammerbox_args = { + 'source_id': 'b3b5fbcb-583d-4894-90bc-19abe85a0f4f', + 'file_uuid': '5a795c00-3df1-468d-b4a0-2e7fe048b6d4' +} + + +def main(): + deployment = config.deployment.name + match deployment: + case 'sandbox': + args = sandbox_args + case 'hammerbox': + args = hammerbox_args + case _: + assert False, deployment + + download( + catalog=config.default_catalog, + **args + ) + + +if __name__ == '__main__': + main() diff --git a/src/azul/__init__.py b/src/azul/__init__.py index 32d71a5e31..5976cbc495 100644 --- a/src/azul/__init__.py +++ b/src/azul/__init__.py @@ -329,6 +329,10 @@ def terra_service_url(self) -> mutable_furl: def ecm_service_url(self) -> mutable_furl: return mutable_furl(self.environ['azul_ecm_service_url']) + @property + def terra_billing_project(self) -> str | None: + return self.environ.get('AZUL_TERRA_BILLING_PROJECT') + @property def dss_query_prefix(self) -> str: return self.environ.get('AZUL_DSS_QUERY_PREFIX', '') diff --git a/src/azul/drs.py b/src/azul/drs.py index f1cc29e87e..8d5e07d6a9 100644 --- a/src/azul/drs.py +++ b/src/azul/drs.py @@ -356,15 +356,21 @@ class DRSObject: _http_client: HttpClient _url: furl - def get(self, access_method: AccessMethod = AccessMethod.https) -> Access: + def get(self, + access_method: AccessMethod = AccessMethod.https, + access_headers: Mapping[str, str] | None = None + ) -> Access: """ Returns access to the content of the data object identified by the given URI. The scheme of the URL in the returned access object depends on the access method specified. """ - return self._get(access_method) + return self._get(access_method, access_headers) - def _get(self, access_method: AccessMethod) -> Access: + def _get(self, + access_method: AccessMethod, + access_headers: Mapping[str, str] | None + ) -> Access: url = self._url while True: response = self._request(url) @@ -384,9 +390,9 @@ def _get(self, access_method: AccessMethod) -> Access: # https://github.com/ga4gh/data-repository-service-schemas/issues/361 assert access_method is AccessMethod.gs, R( 'Unexpected access method', access_method) - return self._get_access(access_id, AccessMethod.https) + return self._get_access(access_id, AccessMethod.https, access_headers) elif access_id is not None: - return self._get_access(access_id, access_method) + return self._get_access(access_id, access_method, access_headers) elif access_url is not None: scheme = furl(access_url['url']).scheme assert scheme == access_method.scheme, R( @@ -403,11 +409,15 @@ def _get(self, access_method: AccessMethod) -> Access: else: raise DRSStatusException(url, response) - def _get_access(self, access_id: str, access_method: AccessMethod) -> Access: + def _get_access(self, + access_id: str, + access_method: AccessMethod, + access_headers: Mapping[str, str] | None + ) -> Access: url = self._url.copy() url.path.add(['access', access_id]) while True: - response = self._request(url) + response = self._request(url, headers=access_headers) if response.status == 200: response_data = json_dict(json.loads(response.data)) scheme = furl(json_str(response_data['url'])).scheme @@ -426,8 +436,8 @@ def _get_access(self, access_id: str, access_method: AccessMethod) -> Access: else: raise DRSStatusException(url, response) - def _request(self, url: furl) -> urllib3.BaseHTTPResponse: - return self._http_client.request('GET', str(url), redirect=False) + def _request(self, url: furl, **kwargs) -> urllib3.BaseHTTPResponse: + return self._http_client.request('GET', str(url), **kwargs, redirect=False) class DRSStatusException(Exception): diff --git a/src/azul/indexer/mirror_service.py b/src/azul/indexer/mirror_service.py index 32cbf69e02..0af1d9c79a 100644 --- a/src/azul/indexer/mirror_service.py +++ b/src/azul/indexer/mirror_service.py @@ -749,7 +749,12 @@ def _repository_url(self, file: File) -> furl: assert file.drs_uri is not None, R( 'File cannot be downloaded', file) object = self.repository_plugin.drs_object(file.drs_uri) - access = object.get(AccessMethod.gs) + billing_project = config.terra_billing_project + if billing_project is not None: + access_headers = {'x-user-project': billing_project} + else: + access_headers = None + access = object.get(AccessMethod.gs, access_headers) assert access.method is AccessMethod.https, access return furl(access.url)