Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions deployments/anvilprod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,7 @@ def env() -> Mapping[str, str | None]:
'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org',
'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org',
'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org',
'AZUL_TERRA_BILLING_PROJECT': 'terra-aae33465',
'azul_ecm_service_url': 'https://externalcreds.dsde-prod.broadinstitute.org',

'AZUL_ENABLE_MONITORING': '1',
Expand Down
1 change: 1 addition & 0 deletions deployments/hammerbox/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,7 @@ def env() -> Mapping[str, str | None]:
'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org',
'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org',
'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org',
'AZUL_TERRA_BILLING_PROJECT': 'terra-aae33465',
'azul_ecm_service_url': 'https://externalcreds.dsde-prod.broadinstitute.org',

# Personal deployments & `hammerbox` share an ES domain with `anvilprod`
Expand Down
6 changes: 6 additions & 0 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,12 @@ def env() -> Mapping[str, str | None]:
#
'AZUL_TERRA_SERVICE_URL': None,

# The Google Project ID associated with the Terra workspace to charge
# for file downloads while mirroring. If left unset, Terra pays the
# egress cost for the downloads.
#
'AZUL_TERRA_BILLING_PROJECT': None,

# OAuth2 Client ID to be used for authenticating users. See section
# 3.2 of the README
#
Expand Down
49 changes: 49 additions & 0 deletions scripts/scratch_7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from azul import config
from azul.indexer.mirror_service import MirrorWorkerService
from azul.service import Filters
from azul.service.index_service import IndexService


def download(catalog, source_id, file_uuid):
index_service = IndexService()
mirror_service = MirrorWorkerService(catalog=catalog, schema_url_func=None)
file = index_service.get_data_file(catalog=catalog,
file_uuid=file_uuid,
file_version=None,
filters=Filters(explicit={},
source_ids={source_id}))
assert file is not None

data = mirror_service._download(file, part=None)
print('Downloaded', len(data), 'bytes')


sandbox_args = {
'source_id': 'b1083e8b-4de9-467a-97de-18179c4e6bd1',
'file_uuid': '60e25442-aba0-4934-af42-be0d536112de'
}

hammerbox_args = {
'source_id': 'b3b5fbcb-583d-4894-90bc-19abe85a0f4f',
'file_uuid': '5a795c00-3df1-468d-b4a0-2e7fe048b6d4'
}


def main():
deployment = config.deployment.name
match deployment:
case 'sandbox':
args = sandbox_args
case 'hammerbox':
args = hammerbox_args
case _:
assert False, deployment

download(
catalog=config.default_catalog,
**args

Check failure

Code scanning / CodeQL

Potentially uninitialized local variable Error

Local variable 'args' may be used before it is initialized.
)


if __name__ == '__main__':
main()
4 changes: 4 additions & 0 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ def terra_service_url(self) -> mutable_furl:
def ecm_service_url(self) -> mutable_furl:
return mutable_furl(self.environ['azul_ecm_service_url'])

@property
def terra_billing_project(self) -> str | None:
return self.environ.get('AZUL_TERRA_BILLING_PROJECT')

@property
def dss_query_prefix(self) -> str:
return self.environ.get('AZUL_DSS_QUERY_PREFIX', '')
Expand Down
28 changes: 19 additions & 9 deletions src/azul/drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,15 +356,21 @@ class DRSObject:
_http_client: HttpClient
_url: furl

def get(self, access_method: AccessMethod = AccessMethod.https) -> Access:
def get(self,
access_method: AccessMethod = AccessMethod.https,
access_headers: Mapping[str, str] | None = None
) -> Access:
"""
Returns access to the content of the data object identified by the
given URI. The scheme of the URL in the returned access object depends
on the access method specified.
"""
return self._get(access_method)
return self._get(access_method, access_headers)

def _get(self, access_method: AccessMethod) -> Access:
def _get(self,
access_method: AccessMethod,
access_headers: Mapping[str, str] | None
) -> Access:
url = self._url
while True:
response = self._request(url)
Expand All @@ -384,9 +390,9 @@ def _get(self, access_method: AccessMethod) -> Access:
# https://github.com/ga4gh/data-repository-service-schemas/issues/361
assert access_method is AccessMethod.gs, R(
'Unexpected access method', access_method)
return self._get_access(access_id, AccessMethod.https)
return self._get_access(access_id, AccessMethod.https, access_headers)
elif access_id is not None:
return self._get_access(access_id, access_method)
return self._get_access(access_id, access_method, access_headers)
elif access_url is not None:
scheme = furl(access_url['url']).scheme
assert scheme == access_method.scheme, R(
Expand All @@ -403,11 +409,15 @@ def _get(self, access_method: AccessMethod) -> Access:
else:
raise DRSStatusException(url, response)

def _get_access(self, access_id: str, access_method: AccessMethod) -> Access:
def _get_access(self,
access_id: str,
access_method: AccessMethod,
access_headers: Mapping[str, str] | None
) -> Access:
url = self._url.copy()
url.path.add(['access', access_id])
while True:
response = self._request(url)
response = self._request(url, headers=access_headers)
if response.status == 200:
response_data = json_dict(json.loads(response.data))
scheme = furl(json_str(response_data['url'])).scheme
Expand All @@ -426,8 +436,8 @@ def _get_access(self, access_id: str, access_method: AccessMethod) -> Access:
else:
raise DRSStatusException(url, response)

def _request(self, url: furl) -> urllib3.BaseHTTPResponse:
return self._http_client.request('GET', str(url), redirect=False)
def _request(self, url: furl, **kwargs) -> urllib3.BaseHTTPResponse:
return self._http_client.request('GET', str(url), **kwargs, redirect=False)


class DRSStatusException(Exception):
Expand Down
7 changes: 6 additions & 1 deletion src/azul/indexer/mirror_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,12 @@ def _repository_url(self, file: File) -> furl:
assert file.drs_uri is not None, R(
'File cannot be downloaded', file)
object = self.repository_plugin.drs_object(file.drs_uri)
access = object.get(AccessMethod.gs)
billing_project = config.terra_billing_project
if billing_project is not None:
access_headers = {'x-user-project': billing_project}
else:
access_headers = None
access = object.get(AccessMethod.gs, access_headers)
assert access.method is AccessMethod.https, access
return furl(access.url)

Expand Down
Loading