diff --git a/.gitmodules b/.gitmodules index cbe223f721..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "terraform/gitlab/vpn/easy-rsa"] - path = terraform/gitlab/vpn/easy-rsa - url = https://github.com/OpenVPN/easy-rsa.git diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst deleted file mode 100644 index ee64870bdb..0000000000 --- a/CONTRIBUTING.rst +++ /dev/null @@ -1,1236 +0,0 @@ -############ -Contributing -############ - -This document contains guidelines that every contributor to this project should -follow. We call them guidelines as opposed to rules because, well, life -happens. If a contributor disregards a guideline, they should have a good -reason for that and should be considered accountable for the consequences. - -.. sectnum:: - :depth: 2 - :suffix: . - -.. contents:: - - -Code Style -========== - -PEP ------ - -* For Python we use PEP8 with E722 disabled (type too general in except clause) - and the maximum line length set to 120 characters. - -Line length ------------ - -* For prose (documentation, comments) wrap lines at the word boundary closest to - or at, but not beyond, column 79. The first column is column 0. - -* For code, we keep the trimmed line length under 81. A trimmed line is a line - in the source with leading and trailing whitespace removed. This means a line - may be indented by 40 characters and contain 80 characters after that. This - rule is designed to keep code readable without forcing excessive wrapping for - more deeply nested control flow constructs. - -String literals ---------------- - -* We prefer single quoted string literals. We used to use double quotes in JSON - literals but that convention is now deprecated and all new string literals are - single quoted except as noted below. - -* We don't escape a quote character within string literals. When a string - literal contains a single quote, that literal is delimited by double quotes - (and vice versa). https://www.python.org/dev/peps/pep-0008/#string-quotes - -Collection literals -------------------- - -* When deciding between list and tuple literals use the following rule of - thumb: Tuples are typically heterogeneous—their members usually have - different types—and immutable and therefore of a fixed length. In other - languages, tuples are referred to as *structs* or *records*. Lists - are *usually* homogeneous and of varying length. Use the type of literal - that most closely matches either side of this distinction. For example, in - - :: - - for first, last, age in [ - ('John', 'Doe', 33), - ('Jane', 'Foe', 44) - ]: - print(last, first, age) - - a list of tuples is the most obvious choice. Each list element is of the - same type `Tuple[str, str, int]`. To add a tuple element, one would have to - change more code. To change a list element one wouldn't have to do anything - else. In other words, the variability in length resembles a list. While - - :: - - for first, last, age in ( - ['John', 'Doe', 33], - ['Jane', 'Foe', 44] - ): - print(last, first, age) - - would work just as well, it *looks* confusing. - -Line wrapping and indentation ------------------------------ - -* We prefer aligned indent for wrapped constructs except for collection literals - collection comprehensions and generator expressions:: - - self.call_me(positional, - x=1, - y=2) - - foo = { - 'foo': False, - 'a': [1, 2, 3] - } - - bar = { - k.upper(): v.lower - for k,v in d.items() - if k.startswith('x') - } - -* Small literal collections may be kept on one line up to the maximum line - length. A small collection is one that has no more than 9 elements, all of - which either primitive values or other small collections. - -* We wrap all elements or none. Instead of :: - - self.call_me(foo, bar, - x=1, y=2) - - we use :: - - self.call_me(foo, - bar, - x=1, - y=2) - - The one exception to this rule are logging method invocations and calls to - reject() and require():: - - logger.info('Waiting up to %s seconds for %s queues to %s ...', - timeout, len(queues), 'empty' if empty else 'not be empty') - - reject(spline not in reticulated_splines, - 'Unreticulated splines cause discombobulation.') - - Only if the second and subsequent arguments won't fit on one line, do we - wrap all arguments, one line per argument. - -Trailing commas ---------------- - -* We don't use trailing commas in enumerations to optimize diffs yet. [#]_ - -.. [#] Note: If we were to adopt trailing commas, we would also have to - abandon our preference of aligned indent. - -Backslashes ------------ - -* We avoid the use of backslash for continuing statements beyond one line. - Instead, we exploit the fact that Python can infer continuation if they - occur in balanced constructs like brackets or parentheses. If necessary we - introduce a pair of parentheses around the wrapping expression. - - With some keywords it is impossible to add semantically insignificant - parentheses. For example, ``assert foo, 'bad'`` is not equivalent to ``assert - (foo, 'bad')``. In these exceptional situations it is permissible to use - backslash for line continuation. - -String interpolation --------------------- - -* When interpolating strings into human-readable strings like log or exception - messages, we use the ``!r`` format modifier (as in ``f'foo is {foo!r}'``) or - ``%r`` in log messages. This automatically adds quotes around the interpolated - string. - -* Except for log messages (see below), we don't use the ``%`` operator or the - ``str.format()`` method. We use ``f''`` strings or string concatenation. When - choosing between the latter two, we use the one that yields the shortest - expression. When both alternatives yield an expression of equal lengths, we - prefer string concatenation:: - - f'{a}{b}' # Simple concatenation of variables - a + b # tends to be longer with f'' strings - - a + str(b) # {} calls str implicitly so f'' strings win - f'{a}{b}' # if any of the variables is not a string - - a + ' ' + b + '.tsv' # When multiple literal strings are involved - f'{a} {b}.tsv' # f'' strings usually yield shorter expressions - -String concatenation --------------------- - -* We use ``str.join()`` when joining more than three elements with the same - character or when the elements are already in an iterable form:: - - f'{a},{b},{c},{d}' # while this is shorter - ','.join((a, b, c, d)) # this is more readable - - f'{a[0],a[1]} # this is noisy and tedious - ','.join(a) # this is not - -* We use `EAFP`_ as a principle. - -.. _EAFP: https://stackoverflow.com/questions/11360858/what-is-the-eafp-principle-in-python - -Variable names --------------- - -* We don't use all upper case (all-caps) names for pseudo constants:: - - CONSTANT_FOO = 'value_bar' # bad - constant_foo = 'value_bar' # better - -* The names of type variables are not necessarily limited to one character but - we do use all-caps for them. In particular, names of bounded type variables - should be more than a single character long, for example:: - - SOURCE_REF = TypeVar('SOURCE_REF', bound='SourceRef') - -* To name variables referencing a mapping like ``dict``, ``frozendict`` or - ``Counter`` we prefer the ``values_by_key`` or ``key_to_value`` convention. - -* The smaller the scope, the shorter the variable names we use. In :: - - def reticulate_splines(splines_to_reticulate): - spline_reticulator = SplineReticulator() - reticulated_splines = spline_reticulator.reticulate(splines_to_reticulate) - return reticulated_splines - - the ``spline`` aspect is implied by the context provided by the method name - so it can be omitted in the body:: - - def reticulate_splines(splines): - reticulator = SplineReticulator() - splines = reticulator.reticulate(splines) - return splines - - You catch my drift. Also note the reassignment. - -* For tiny scopes like comprehensions, we even use single letter variable names - if it's clear from the context what they mean:: - - {k: str(v) for k, v in numeric_splines.items()} - [ i * reticulate(s) in enumerate(numeric_splines.values()) - - We prefer ``k`` and ``v`` for mapping keys and values, and ``i`` for counters. - -Logging -------- - -* Loggers are instantiated in every module that needs to log - -* Loggers are always instantiated as follows:: - - log = logging.getLogger(__name__) - -* At program entry points we use the appropriate configuration method from - ``azul.logging``. Program entry points are - - - in scripts:: - - if __name__ == '__main__': - configure_script_logging(log) - - - in test modules:: - - def setUpModule(): - configure_test_logging(log) - - - in ``app.py``:: - - log = logging.getLogger(__name__) - app = AzulChaliceApp(app_name=config.indexer_name) - configure_app_logging(app, log) - -* We don't use ``f''`` strings or string concatenation when interpolating - dynamic values into log messages:: - - log.info(f'Foo is {bar}') # don't do this - log.info('Foo is %s', bar) # do this - -* Computationally expensive interpolations should be guarded:: - - if log.isEnabledFor(logging.DEBUG): - log.debug('Foo is %s', json.dump(giant, indent=4) - -* Log and exception messages should not end in a period unless the message - contains multiple sentences. If it does, all sentences in the message should - end in a period, including a period at the end of the string. - -Imports -------- - -* We prefer absolute imports. - -* We sort imports first by category, then lexicographically by module name and - then by imported symbol. The categories are - - 1. Import of modules in the Python runtime - - 2. Imports of modules in external dependencies of the project - - 3. Imports of modules in the project - -* To minimize diffs and reduce the potential for merge conflicts, only one - symbol may imported per line. When using ``from`` imports, all imported - symbols must be wrapped in parentheses, indented, and the last symbol must - have a trailing comma. Note that this applies even if only *one* symbol is - imported. Thus, assuming that ``foo`` and ``bar`` are from the same category, - :: - - import foo - from foo import ( - glue, - shoe, - ) - import bar - from bar import ( - far, - ) - - Is the *only* correct sequence of import statements for these symbols. - -* We carefully selected the ordering criteria to match those implemented by - PyCharm. PyCharm's *Optimize Imports* feature should be the preferred method - of resolving import statement ordering violations, as the line numbers - reported by our flake8 plugin are not always optimal in illuminating the - nature of the violations. - -* The one violation *not* addressable via PyCharm is our requirement that - single-symbol ``from`` imports be wrapped the same as multi-symbol ones. - Currently, this must be corrected manually. Vim users may find the following - macro convenient for this purpose: - :: - - ^3Wi(A,) - -Comments --------- - -* We don't use inline comments to explain what should be obvious to software - engineers familiar with the project. To help new contributors become - familiar, we document the project architecture and algorithms separately from - the Python source code in a ``docs`` subdirectory of the project root. - -* When there is the need to explain in the source, we focus on the Why rather - than the How. - - -Inline Documentation --------------------- - -* We use docstrings to document the purpose of an artifact (module, class, - function or method), and its contract between with client code using it. We - don't specify implementation details in docstrings. - -* We put the triple quotes that delimit docstrings on separate lines:: - - def foo(): - """ - Does nothing. - """ - pass - - This visually separates function signature, docstring and function body from - each other. - -* Any method or function whose purpose isn't obvious by examining its signature - (name, argument names and type hints, return type hint) should be documented - in a docstring. - -* Every external-facing API must have a docstring. An external-facing API is a - class, function, method, attribute or constant that's exposed via Chalice - or—if we ever were to release a library for use by other developers—exposed - in that library. - - -Code duplication ----------------- - -* We avoid duplication of code and continually refactor it with the goals of - reducing entropy while increasing consistency and reuse. - -Consistency and precedent -------------------------- - -* We try to follow existing precedent: we emulate what people did before us - unless there is a good reason not to do so. Taste and preference are not good - reasons because those differ from person to person. - - If resolving an issue requires touching a section of code that consistently - violates the guidelines laid out herein, we either - - a) follow the precedent and introduce another violation or - - b) change the entire section to be compliant with the guidelines. - - Both are acceptable. We weigh the cost of extending the scope of our current - work against the impact of perpetuating a problem. If we decide to make the - section compliant, we do so in a separate commit. That commit should not - introduce semantic changes and it should precede the commit that resolves the - issue. - -Ordering artifacts in the source --------------------------------- - -* We generally use top-down ordering of artifacts within a module or script. - Helper and utility artifacts succeed the code that use them. Bottom-up - ordering—which has the elementary building blocks occur first—makes it harder - to determine the purpose and intent of a module at a glance. - -Disabling sections of code --------------------------- - -* We do not comment out sections of code. To temporarily disable some section of - code, we embed it in a conditional statement with a test that always evaluates - to ``False``. We do this to keep the code subject to refactorings and type - checkers. We use ``azul.false()`` instead of just the ``False`` literal, in - order to avoid the detection of unreachable code during static analysis by - PyCharm and CodeQL. - -Control flow ------------- - -* We avoid using bail-out statements like ``continue``, ``return`` and - ``break`` unless not using them would require duplicating code, increase the - complexity of the control flow or cause an excessive degree of nesting. - - Examples from the limited set of cases in which bail-outs are preferred:: - - while True: - - if : - break - - - can be unrolled into - - :: - - - while not : - - - - but that requires duplicating the ```` section. In this case - the use of ``break`` is preferred. - - Similarly, - - :: - - while : - if not : - - if not : - - if not : - - - can be rewritten as - - :: - - while : - if : - continue - - if : - continue - - if : - continue - - - This eliminates the nesting which may in turn require fewer wrapped lines in - the ```` sections, leading to increased readability. - -* We add ``else`` for clarity even if its use isn't semantically required:: - - if - - return X - - return Y - - should be written as - - :: - - if - - return X - else: - - return Y - - The latter clearly expresses the symmetry between and the equality of the two - branches. It also reduces the possibility of introducing a defect if the code - is modified to eliminate the ``return`` statements:: - - if - - - - is broken, while the modified version with else remains intact:: - - if - - else: - - -Static methods --------------- - -* We always use ``@classmethod`` instead of @staticmethod, even if the first - argument (cls) of such a method is not used by its body. Whether cls is used - is often incidental and an implementation detail. We don't want to repeatedly - switch from ``@staticmethod`` to ``@classmethod`` and back if that - implementation detail changes. We simply declare all methods that should be - invoked through the class (as opposed to through an instance of that class) as - ``@classmethod`` and call it a day. - - The same consideration goes for instance methods and ``self``: some use it, - some don't. The ones that don't shouldn't suddenly be considered static - methods. The distinction between instance and class methods is driven by - higher order concerns than the one about whether a method's body currently - references self or not. - -Error messages --------------- - -* We avoid the use of f-strings when composing error messages for exceptions and - for use with ``require()`` or ``reject()``. If an error message is included, - it should be short, descriptive of the error encountered, and optionally - followed by the relevant value(s) involved:: - - raise KeyError(key) - - raise ValueError('Unknown file type', file_type) - -* Requirement errors should always have a message, since they are intended for - clients/users:: - - require(delay >= 0, 'Delay value must be non-negative') - - require(url.scheme == 'https', "Unexpected URL scheme (should be 'https')", url.scheme) - - reject(entity_id is None, 'Must supply an entity ID') - - reject(file_path.endswith('/'), 'Path must not end in slash', path) - -* Assertions are usually self-explanatory. Error messages should only be - included when they are not:: - - assert not debug - - assert isinstance(x, list), type(x) - - assert x == y, ('Misreticulated splines', x, y) - -Catching exceptions -------------------- - -* When catching expected exceptions, especially for `EAFP`_, we minimize the - body of the try block:: - - d = make_my_dict() - try: - x = d['x'] - except: - - else: - - - This is not a mere cosmetic convention, it affects program correctness. If the - call to ``make_my_dict`` were done inside the ``try`` block, a KeyError raised - by it would be conflated with the one raised by d['x']. The latter is - expected, the former usually constitutes a bug. - -Raising exceptions ------------------- - -* When raising an exception without arguments, we prefer raising the class - instead of raising an instance constructed without arguments:: - - raise RuntimeError() # bad - raise RuntimeError - -Type hints ----------- - -* We use type hints both to document intent and to facilitate type checking by - an IDE or other tooling. - -* When defining type hints for a function or method, we do so for all its - parameters and the return value. - -* Now that `PEP-585`_ has arrived in Python 3.9, we prefer the generic built-in - types over the deprecated aliases from ``typing`` e.g., ``dict[K,V]`` over - ``Dict[K,V]``. The one exception to this rule is that due to a bug in PyCharm - we still have to employ ``typing.Type`` instead of the recommended generic - use of ``type``. - -.. _PEP-585: https://peps.python.org/pep-0585/ - -.. - FIXME: Remove above exception - https://github.com/DataBiosphere/azul/issues/4184 - - In the same vein, we avoid any of the aliases in ``typing`` and prefer their - primary definitions instead. For example, we prefer ``collections.abc.Set`` - over ``typing.AbstractSet`` Note that the deprecated ``typing.Set`` (an alias - of ``set``) is mutable while ``collections.abc.Set`` is not, so be sure to - import ``Set`` from ``collections.abc``. - -* For method/function *arguments* we prefer the least specific type - possible e.g., ``Mapping`` over ``dict`` or ``MutableMapping`` and - ``Sequence`` over ``List`` or ``list``. For example, we don't use ``dict`` for - an argument unless it is actually modified by the function/method. - -* Unless code should truly support multiple implementations of mutable mappings, - we prefer ``dict[K,V]`` over ``MutableMapping[K,V]``. In the rare occasions - that we pick the latter, we use the definition from ``abc.collections`` - instead of the alias in ``typing``. - -* For method and function return values we specify the type that we anticipate - to be useful to the caller without being overly specific. For example, we - prefer ``dict`` for the return type because ``Mapping`` would prevent the - caller from modifying the returned dictionary, something that's typically not - desirable. If we do want to prevent modification, we would return a - ``frozendict`` or equivalent and declare the return value to be ``Mapping``. - -* Owing to the prominence of JSON in the project we annotate variables - containing deserialized JSON as such, using the ``JSON`` and ``MutableJSON`` - types from ``azul.typing``. Note that due to the lack of recursive types in - PEP-484, ``JSON`` unrolls the recursion only three levels deep. This means - that with ``x: JSON`` the expression ``x['a']['b']['c']`` would be of type - ``JSON`` while ``x['a']['b']['c']['d']`` would be of type ``Any``. - - -Method and function arguments ------------------------------ - -* Arguments declared as a keyword must be passed as keyword arguments at all - call sites. - -* For call sites with more than three passed arguments, all arguments should be - passed as keywords, even positional arguments, if one of the arguments is - passed as a keyword. - -* At call sites that pass a literal expression to a function or method, consider - passing the argument as a keyword. Instead of :: - - foo(x, {}) - bar(True) - - use :: - - foo(filters={}) - bar(delete=True) - - while leaving :: - - add(1, 2) - setDelete(True) - - as is. - -* We prefer enforcing the use of keyword arguments using keyword-only arguments - as defined in `PEP-3102`_. - - -.. _PEP-3102: https://www.python.org/dev/peps/pep-3102/ - - -Abstract classes ----------------- - -When creating abstract base classes using the `abc` module, we prefer to write:: - - class Base(metaclass=ABCMeta): - -instead of:: - - class Base(ABC): - -Testing -======= - -Coverage of new code --------------------- - -* All new code should be covered by unit tests. - -Coverage of legacy code ------------------------ - -* Legacy code for which tests were never written should be covered when it is - modified. - -Subtests --------- - -* Combinatorial tests (tests that exercise a number of combinations of inputs) - should make use of ``unittest.TestCase.subTest()`` so a single failing - combination doesn't prevent other combinations form being exercised. - -* Sub-tests may makes sense even when there isn't a large number of - combinations. Consider two independent tests that share an expensive fixture. - Instead of isolating the two tests in separate ``TestCase`` whose - ``setUpClass`` method sets up the expensive fixture, one might write a single - test method as follows:: - - def test_a_b(self): - self.set_fixture_up() - try: - with self.subTest('a'): - ... - with self.subTest('b'): - ... - finally: - self.tear_fixture_down() - - This can only be done if ``a`` and ``b`` are independent. Ask yourself: - does testing ``b`` make sense even after ``a`` fails? Can I safely reorder - ``a`` and ``b`` without affecting the result? If the answer is "no" to either - question, you have to remove the ``self.subText()`` invocations. - -* We don't use sub-tests for the sole purpose of marking different sections of - test code. - -Doctests --------- - -* Code that doesn't require elaborate or expensive fixtures should use doctests - if that adds clarity to the documentation or helps with expressing intent. - Modules containing doctests must be registered in the ``test_doctests.py`` - script. - -Integration tests ------------------ - -* Code that can only be tested in a real deployment should be covered by an - integration test. - - -Version Control -=============== - -Branches --------- - -* Feature branches are merged into ``develop``. If a hotfix is made to a - deployment branch other than ``develop``, that branch is also back-ported and - merged into ``develop`` so that the hotfix eventually propagates to all - deployments. - -* During a promotion, the branch for a lower deployment (say, ``integration``) - is merged into the branch for the next higher deployment. - -* We commit independent changes separately. If two changes could be applied in - either order, they should occur in separate commits. Two changes A and B of - which B depends on A may still be committed separately if B represents an - extension of A that we might want to revert while leaving A in place. - -Commits -------- - -* We separate semantically neutral changes from those that alter semantics by - committing them separately, even if that would violate the previous rule. The - most prominent example of a semantically neutral change is a refactoring. We - also push every semantically neutral commit separately such that the build - status checks on Github and Gitlab prove the commit's semantic neutrality. - -* In theory, every individual commit should pass unit and integration tests. In - practice, on PR branches with long histories not intended to be squashed, not - every commit is built in CI. This is acceptable. [#]_ - -.. [#] Note: I am not a fan this rule but the desire to maintain a linear - history by rebasing PR branches as opposed to merging them requires this - loophole. When pushing a rebased PR branch, we'd have to build every - commit on that branch individually. Exploitation of this loophole can be - avoided by creating narrowly focused PRs with only one logical change - and few commits, ideally only one. We consider the creation of PRs with - longer histories to be a privilege of the lead. - -Split commits -------------- - -* A split commit is a set of commits that represent a single logical change that - had to be committed separately up for technical reasons, to fairly capture - multiple authors' contributions, for example, or to avoid bloated diffs (see - below). We refer to the set of commits as the *split commit* and the members - of the set as the *part commit*. - -* The title of a part commit always carries the M/N tag (see `Commit titles`_), - where N is the number of parts while M is the ordinal of the part, reflecting - the topological order order of the parts. Splitting a change that - "reticulates splines" into two parts yields two commits having the titles - - - ``[1/2] Reticulate them splines for good measure (#123)`` and - - ``[2/2] Reticulate them splines for good measure (#123)`` - - respectively. - -* The parts must be consecutive, except for split commits made to retain - authorship. The parts of a commit that was split to retain authorship can have - other commits in between the parts if there is pressing reason to do so. - -* The body of the commit messages for each part should have prose to distinguish - the parts, except for split commits made to retain authorship, where the - distinction is obvious: each part reflects the author's contribution. - -Bloated diffs -------------- - -* We avoid bloated diffs. A bloated diff has semantic changes on top of large - hunks of deletions that resemble additions somewhere else in the diff. We - especially avoid insidiously bloated diffs where the semantic change occurs - *within* one of those large hunks of deletions or additions. Bloated diffs - distort authorship and are hard to review. - - * We avoid moving large amounts of code around via Cut & Paste unless there is - a technical reason to do so. If there is, we commit the code change that - moves the code as part 1/2 of a split commit, then commit the changes that - maintain referential integrity as part 2/2. Any additional changes to the - moved code are committed as a normal commit. - - * When splitting a file into multiple files, we identify the largest part - and move the file so that its new name reflects the largest part. We commit - that change as part 1/3 of a split commit to trigger Git's heuristic for - detecting file renames. This maximizes the amount of authorship that is - maintained. We then move the remaining parts into their respective files - using the method in the previous bullet using 2/3 for moving the code and - 3/3 for maintaining referential integrity. It's acceptable for the 1/3 - commit to include any changes maintaining referential integrity during the - file rename because those occur in different files and therefore don't risk - tripping up the heuristic. - -Commit titles -------------- - -* If a commit resolves (or contributes to the resolution of) an issue, we - mention that issue at the end of the commit title:: - - Reticulate them splines for good measure (#123) - - Note that we don't use Github resolution keywords like "fixes" or "resolves". - Any mention of those preceding an issue reference in a title would - automatically close the issue as soon as the commit appears on the default - branch. This is undesirable as we want to continue to track issues in - ZenHub's *Merged* and *Done* pipelines even after the commit appears on the - ``develop`` branch. - -* We value `expressive and concise commit message titles`_ and try to adhere to - Github's limit of 72 characters for the length of a commit message title. - Beyond 72 characters, Github truncates the title at 69 characters and adds - three dots (ellipsis) which is undesirable. Titles with lots of wide - characters like ``W`` may still wrap (as opposed to being truncated) but - that's improbable and therefore acceptable. - -* We don't use a period at the end of commit titles. - -* We use `sentence case`_ for commit titles. - -.. _expressive and concise commit message titles: https://chris.beams.io/posts/git-commit/ - -.. _sentence case: https://utica.libguides.com/c.php?g=291672&p=1943001 - -* When reverting a commit, be it literally or "in spirit", we refer to the - commit ID of the reverted commit in the body of the message of the reverting - commit. The reverting commit message title should also include a reference - to the issue whose resolution includes the reverted commit. For literal - reverts the commit message should be `Revert "{title of reverted commit}"` - Most Git tooling does this automatically. For example (a literal revert, - done with SmartGit):: - - f733e71 Revert "Reticulate them splines (#123)" - - This reverts commit bb7a87bed2c0a25aeecb1a542713ad6eda140f35 - - bb7a87b Reticulate them splines (#123) - - Another example (a reversion in spirit):: - - f733e71 Revert reticulation of discombolutated splines (#123) - - bb7a87b - … - bb7a87b Reticulate them splines (#123) - -Commit title tags ------------------ - -* Commit titles can have tags. Tags appear between square brackets at the very - beginning of a commit message. Multiple tags are separated by space. The - following tags are defined: - - - ``u``: the commit requires following manual steps to upgrade a working copy - or deployment. See `UPGRADING.rst`_ for details. - - - ``r``: the commit represents a change that requires reindexing a deployment - after that commit is deployed there. - - - ``R``: the commit requires running ``make requirements`` after switching a - working copy to a branch that includes that commit - - - ``M/N``: number of parts and ordinal of part in `Split commits`_ - - - ``h``: the commit is a temporary hotfix. These commit should be reverted - before the commit that provides a permanent fix is merged. - - - ``H``: the commit is a permanent hotfix. - - - ``a``: the commit modifies the Azul service API (adding functionality) - - - ``A``: the commit modifies the Azul service API in a way that is likely to - break existing clients (changing or removing functionality) - - - ``p``: the commit only partially addresses the issues it references. A - commit referencing more than one issue must not resolve some of those issues - partially and others completely. In other words, the ``p`` tag applies - uniformly to all referenced issues. Since merge commits from feature - branches represent a combination of individual commits, this rule prevents - the merge commit from mentioning all issues referenced by those individual - commits. A merge commit title only references the connected issues, and a - ``p`` tag in such a title applies to all of them uniformly. Merge commits - for promotions, backports, GitLab updates and hotfixes don't mention the - ``p`` tag in their titles. - -* Tags must appear in a title in the order they are defined above, as in - ``[u r R 1/2]``. This ensures that more consequential tags appear earlier. - -.. _UPGRADING.rst: ./UPGRADING.rst - -Issue Tracking -============== - -* We use Github's built-in issue tracking and ZenHub. - -* We use `sentence case`_ for issue titles. - -* We don't use a period at the end of issue titles. - -* For issue titles we prefer brevity over precision or accuracy. Issue titles - are read many times and should be optimized toward quickly scanning them. - Potential omissions, inaccuracies and ambiguities in the title can be added, - corrected or clarified in the description. - -* We make liberal use of labels. Labels denoting the subject of an issue are - blue, those denoting the kind of issue are green, issues relating to the - development process are yellow. Important labels are red. - -* We prefer issue to be assigned to one person at a time. If the original - assignee needs the assistance by another team member, the issue should be - assigned to the assisting person. Once assistance was provided, the ticket - should be assigned back to the original assignee. - -* We use ZenHub dependencies between issues to express constraints on the - order in which those issues can be worked on. If issue ``#1`` blocks - ``#2``, then work on ``#2`` can't begin before work on ``#1`` has completed. - For issues that are resolved by a commit, work is considered complete when - that commit appears on the ``develop`` branch. - - -Pull Requests -============= - -Naming Branches ---------------- - -* When naming PR branches we follow the template below:: - - issues/$AUTHOR/$ISSUE_NUMBER-$DESCRIPTION - - ``AUTHOR`` is the Github profile name of the PR author. - - ``ISSUE_NUMBER`` is a numeric reference to the issue that this PR addresses. - - ``DESCRIPTION`` is a short (no more than nine words) slug_ describing the - branch - -.. _slug: https://en.wikipedia.org/wiki/Clean_URL#Slug - - -Draft PRs ---------- - -GitHub has the option of creating draft_ PRs. Azul PRs, with the exception of -GitLab updates, promotions, hotfixes and backports must be created as drafts. -This prevents GitHub from immediately requesting a code review from the lead, -who is the sole code owner. Peer review occurs during the draft state of a -PR, primary review occurs when a PR is in the non-draft state, what GitHub -calls "ready for review". A work-in-progress review (WIP) can be requested -for PRs in any state as long as the request is accompanied with specific -questions. The PR checklist contains an item for ensuring that PRs are -initially created as draft. If you accidentially create a non-draft PR, -convert the PR to a draft and cancel the review request. - -.. _draft: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests#draft-pull-requests - - -Rebasing --------- - -* The PR author rebases the PR branch before every review - -Fixups ------- - -* Changes that address the outcome of a review should appear as separate commit. - We prefix the title of those commits with ``fixup!`` and follow that with - a space and the title of an earlier commit that the current commit should be - squashed with. A convenient way to create those commits is by using the - ``--fixup`` option to ``git commit``. - -* Changes by a PR author that resolve merge conflicts introduced after a PR - was approved by the lead should be committed separately as fixups. The PR - needs to be reviewed again by the lead. - - When the rebase stops due to a conflict, the author commits all non - conflicting changes with `--amend`, then commits the conflict resolution as - ``fixup! Previous commit's title`` and finally continues the rebase. There - should be one ``fixup!`` commit for every time the rebase stops. - - If the operator resolves a post-approval conflict, none of this is - necessary. The operator should only resolve trivial conflicts, and only if - they feel confident that the resolution does not break anything. - -Squashing previous fixups -------------------------- - -* Unless the PR reviewer has already done so, the PR author squashes all - existing fixups after they get the branch back from the reviewer, and before - addressing the review outcome with more fixups. - - -Assigning PRs -------------- - -* The author of a PR may request reviews from anyone at any time. Once the - author considers a PR ready to land (be merged into the base branch), the - author rebases the branch, assigns the PR to the reviewer, the *primary - reviewer* and requests a review from that person. Note that assigning a PR - and requesting a review are different actions on the Github UI. - -* If a PR is assigned to someone (typically the primary reviewer), only the - assignee may push to the PR branch. If a PR is assigned to no one, only the - author may push to the PR branch. - -Rewriting history ------------------ - -* Commits in a PR should not invalidate changes from previous commits in the PR. - Revisions that occur during development should be incorporated into their - relevant ancestor commit. There are various techniques to achieve this (``git - commit --amend``, ``git rebase --interactive``, ``git rebase --interactive - --autosquash`` or ``git reset`` and committing the changes again but all of - these techniques involve rewriting the commit history. Rewriting the history - of a feature branch is allowed and even encouraged but … - -* … we only rewrite the part of the branch that has not yet been reviewed. - To modify a commit that has already been reviewed, we create a new ``fixup!`` - commit containing the changes that addressing the reviewers comments. - - Before asking for another review, we may amend or rewrite that ``!fixup`` - commit. In fact, amending a ``!fixup`` commit between reviews is preferred in - order to avoid a series of redundant fixup commits referring to the same main - commit. In other words, the commits added to a feature branch after a review - should all have distinct titles. - -Drop commits ------------- - -* At times it may be necessary to temporarily add a commit to a PR branch e.g., - to facilitate testing. These commits should be removed prior to landing the - PR and their title is prefixed with ``drop!``. - -* The hunks in a ``drop!`` commit should carry an inline comment marking the - hunk as something that will be removed. That way a reviewer can easily tell - apart temporary hunks from permanent ones without having to consult the - commit history. - -* When squashing old fixups, ``drop!`` commits should be be retained. - -* Most PRs land squashed down into a single commit. A PR with more than one - significant commit is referred to as a *multi-commit PR*. Prior to landing - such a PR, the primary reviewer may decide to consolidate its branch. - Alternatively, the primary reviewer may ask the PR author to do so in a final - rejection of the PR. The final consolidation eliminates both ``fixup!`` and - ``drop!`` commits. - -Status checks -------------- - -* We usually don't request a review before all status checks are green. In - certain cases a preliminary review of a work in progress is permissible but - the request for a preliminary review has to be qualified as such in a comment - on the PR. - -Holding branches warm ---------------------- - -* Some PR branches are can't be reviewed or merged for concerns external to the - PR. The PR is labeled ``hold warm`` and the assignee of the PR, or the author, - if no assignee is set, rebases the branch periodically and resolves any - conflicts that might come up. - -Merging -------- - -* Without expressed permission by the primary reviewer, only the primary - reviewer merges PR branches. Certain team members may possess sufficient - privileges to push to main branches, but that does not imply that those team - members may merge PR branches. - -* The primary reviewer uses the ``sandbox`` label to indicate that a PR is - being tested in the sandbox deployment prior to being merged. Only one open PR - may be assigned the ``sandbox`` label at any point in time. - -* When a PR branch is merged, the title of the merge commit should match the - title of the pertinent commit in the branch, but also include the PR number. - An example of this history looks like:: - - * 8badf00d Reticulate them splines for good measure (#123, PR #124) - |\ - | * cafebabe Reticulate them splines for good measure (#123) - |/ - ... - - If a PR branch contains more than one commit, one of them usually represents - the main feature or fix while other commits are preparatory refactorings or - minor unrelated changes. The title of merge commit in this case usually - matches that of the main commit. - -Review comments ---------------- - -* Github lets any user with write access resolve comments to changes in a PR. We - aren't that permissive. When the reviewer makes a comment, either requesting - a change or asking a question, the author addresses the comment by either - - - making the requested changes and reacting to the comment with a thumbs-up 👍 - - - or replying with a comment that answers the question or explains why the - change can't be applied as requested. - - In either case, only the reviewer resolves the comment. This is to ensure that - the reviewer can refresh their memory as to which changes they requested in a - prior review so they can verify if they were addressed satisfactorily. - -PR dependencies ---------------- - -* We use ZenHub dependencies between PRs to define constraints on the order in - which they can be merged into ``develop``. If PR ``#3`` blocks PR ``#4``, - then ``#3`` must be merged before ``#4``. Issues must not block PRs and PRs - must not block issues. The only express relation we use between issues and - PRs is ZenHub's *Connect to issue* feature. Note that an explicit - dependency between two issues implies a dependency between the PRs - connected to the issues: if issue ``#1`` blocks issue ``#2`` and PR ``#3`` - is connected to ``#1`` while PR ``#4`` is connected to ``#2``, then PR - ``#4`` must be merged after PR ``#3``. - -Chained PRs ------------ - -* If two PRs touch the same code, or if one PR depends on changes in another - PR, the PRs may be chained. We say a PR ``#4`` is chained to PR ``#3``, if - the branch for PR ``#4`` is a continuation of the branch for PR ``#3``. We - refer to PR ``#3`` as the *base PR* and the branch for ``#3`` as the *base - branch*. - -* The base PR blocks the chained PR (see `PR dependencies`_ for details). It - is rare for a PR to be blocked by another PR without also being chained to - it. - -* Only a draft PR may be chained to another PR. Note that this implies that - the primary reviewer generally does not review chained PRs unless they are - labeled `WIP` and the request is accompanied by specific questions. - -* To chain PR ``#4`` to PR ``#3`` … - - 1) Make sure PR ``#4`` is a draft PR - - 2) Using ``git``, base the ``#4`` branch on the ``#3`` branch - - 3) In Github, set the base branch of PR ``#4`` to the PR branch of ``#3`` - - 4) In ZenHub, mark PR ``#4`` as blocked by PR ``#3``. - -* A PR may be chained to a PR that is chained to another PR, creating a chain - of length 3. PR chains can be of arbirary length. All but the first PR in a - chain must be drafts. - - Note that in chains involving more than two PRs, the intermediate PRs carry - both the ``chained`` and ``base`` labels. - -* Rebasing a chained PR involves rebasing its branch on the base branch - instead of ``develop``. - -* Once the base PR of a chain is merged, all chained PRs need to be rebased:: - - git rebase --onto origin/develop $start_commit issues/joe/1234-foo - - where ``start_commit`` is the first commit in ``issues/joe/1234-foo`` that - wasn't also on the base PR's branch. - -Hotfixes --------- - -A hotfix is a change that is either pushed directly to the ``prod`` branch or -that is merged into the ``prod`` branch from a PR targeting ``prod``. The need -for hotfixes arises when defects are detected *after* a promotion, or a -previous hotfix for that matter, if such defects demand urgent remediation. - -When tasked with the creation of a hotfix PR, create a new branch off the -``prod`` branch, commit the changes and request review from the lead. Hotfixes -typically do not undergo peer review. We distinguish between permanent and -temporary hotfixes. All hotfixes are backported but temporary hotfixes will -be reverted and replaced with a permanent fix via the normal promotion. The -commit title tag is - -- ``h`` on a temporary hotfix, -- ``H`` on a permanent hotfix and -- ``F`` on the permanent fix for a temporary hotfix. - -When authoring a hotfix, make sure that it doesn't negatively affect any other -deployment when the hotfix is backported. The hotfix must not break the build -in any deployment and cannot reduce test coverage in deployments other than -``prod``. A conditional on ``config.deployment_stage`` can be used to guard -against such negative effects. If the hotfix does reduce coverage, it must be -a temporary hotfix and the corresponding permanent fix must restore -coverage. Hotfixes must not alter the index document format or otherwise require -a reindex. Hotfixes must not require upgrading deployments. - -One might ask why we bother with backporting temporary hotfixes after all. -Without a backport, the promotion of the corresponding permanent fix will -likely cause conflicts that an operator might find difficult to resolve. And -that's if the permanent fix overlaps with the termporary one. If it doesn't, -the author of the permanent fix can only revert the temporary hotfix if it -was actually backported to ``develop``. - -In the most urgent situations, a hotfix may be pushed directly to the ``prod`` -branch, without filing a PR. The above requirements apply regardless. Only -operators and system administrators can push to ``prod``. For PR'ed hotfixes, -a checklist item reminds the operator to file the backport. Since there is no -such PR for directy pushed hotfixes, the author of such a hotfix must file a -backport PR immediately after pushing the hotfix and verifying that it works -as expected. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index e1c49dc2ee..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -ARG azul_docker_registry -ARG azul_python_image -FROM --platform=${TARGETPLATFORM} ${azul_docker_registry}${azul_python_image} - -ARG TARGETARCH - -SHELL ["/bin/bash", "-c"] - -# Increment the value of this argument to ensure that all installed OS packages -# are updated. -# -ARG azul_image_version=1 -# FIXME: Remove mounting of fips_enabled -# https://github.com/DataBiosphere/azul/issues/6675 -ARG azul_proc_sys_crypto -RUN --mount=type=bind,source=fips_enabled,target=${azul_proc_sys_crypto}/fips_enabled \ - apt-get update \ - && apt-get upgrade -y \ - && apt-get -y install build-essential curl unzip - -# Install helper for access to ECR with credendtials from EC2 metadata service -# -RUN curl -o /usr/bin/docker-credential-ecr-login \ - https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.7.0/linux-amd64/docker-credential-ecr-login \ - && printf 'c978912da7f54eb3bccf4a3f990c91cc758e1494a8af7a60f3faf77271b565db /usr/bin/docker-credential-ecr-login\n' | sha256sum -c \ - && chmod +x /usr/bin/docker-credential-ecr-login -ARG azul_docker_registry -ENV azul_docker_registry=${azul_docker_registry} -RUN mkdir -p ${HOME}/.docker \ - && printf '{"credHelpers": {"%s": "ecr-login"}}\n' "${azul_docker_registry%/}" \ - > "${HOME}/.docker/config.json" - -# Install Terraform -# -ARG azul_terraform_version -RUN mkdir terraform \ - && (set -o pipefail \ - && cd terraform \ - && curl -s -o terraform.zip \ - https://releases.hashicorp.com/terraform/${azul_terraform_version}/terraform_${azul_terraform_version}_linux_${TARGETARCH}.zip \ - && unzip terraform.zip \ - && mv terraform /usr/local/bin) \ - && rm -rf terraform - -# Install Docker from apt repository. The statically linked binaries don't -# include buildx or buildkit. -# -# https://docs.docker.com/engine/install/debian/#install-using-the-repository -# -RUN install -m 0755 -d /etc/apt/keyrings -COPY --chmod=0644 bin/keys/docker-apt-keyring.pgp /etc/apt/keyrings/docker.gpg -ARG azul_docker_version -# FIXME: Remove mounting of fips_enabled -# https://github.com/DataBiosphere/azul/issues/6675 -RUN --mount=type=bind,source=fips_enabled,target=${azul_proc_sys_crypto}/fips_enabled \ - set -o pipefail \ - && ( \ - echo "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" \ - | tee /etc/apt/sources.list.d/docker.list \ - ) \ - && apt-get update \ - && version=$(apt-cache madison docker-ce | awk '{ print $3 }' | grep -P "^5:\Q${azul_docker_version}\E" | head -1) \ - && test -n "$version" \ - && apt-get -y install docker-ce=$version docker-ce-cli=$version docker-buildx-plugin - -# Prepare working directory for builds -# -RUN mkdir /build -WORKDIR /build - -# Install Azul dependencies -# -ARG PIP_DISABLE_PIP_VERSION_CHECK -ENV PIP_DISABLE_PIP_VERSION_CHECK=${PIP_DISABLE_PIP_VERSION_CHECK} -COPY environment requirements*.txt common.mk Makefile ./ -ARG make_target -RUN source environment \ - && make virtualenv \ - && source .venv/bin/activate \ - && make $make_target \ - && rm requirements*.txt common.mk Makefile diff --git a/OPERATOR.rst b/OPERATOR.rst deleted file mode 100644 index 8adf35be25..0000000000 --- a/OPERATOR.rst +++ /dev/null @@ -1,952 +0,0 @@ -.. contents:: - -Getting started as operator ---------------------------- - -* Read the entire document - -* It is **strongly recommend** that you install `SmartGit`_ - -.. _SmartGit: https://www.syntevo.com/smartgit/download/ - -* Ask the lead via Slack to: - - - add you to the ``Azul Operators`` GitHub group on DataBiosphere - - - give you Maintainer access to the GitLab ``dev``, ``anvildev``, - ``anvilprod`` and ``prod`` instances - - - assign to you the ``Editor`` role on the Google Cloud - projects ``platform-hca-prod`` and ``platform-hca-anvilprod`` - - - remove the ``Editor`` role in those projects from the previous operator - -* Ask Erich Weiler (weiler@soe.ucsc.edu) via email (cc Ben and Hannes) to: - - - grant you developer access to AWS accounts ``platform-hca-prod`` and ``platform-anvil-prod` - - - revoke that access from the previous operator (mention them by name) - -* Confirm access to GitLab: - - #. Add your SSH key to your user account on GitLab under the "Settings/SSH - Keys" panel - - #. Confirm SSH access to the GitLab instance:: - - ssh -T git@ssh.gitlab.dev.singlecell.gi.ucsc.edu - Welcome to GitLab, @amarjandu! - - #. Add the gitlab instances to the local working copy's ``.git/config`` file - using:: - - [remote "gitlab.dcp2.dev"] - url = git@ssh.gitlab.dev.singlecell.gi.ucsc.edu:ucsc/azul - fetch = +refs/heads/*:refs/remotes/gitlab.dcp2.dev/* - [remote "gitlab.dcp2.prod"] - url = git@ssh.gitlab.azul.data.humancellatlas.org:ucsc/azul.git - fetch = +refs/heads/*:refs/remotes/gitlab.dcp2.prod/* - [remote "gitlab.anvil.dev"] - url = git@ssh.gitlab.anvil.gi.ucsc.edu:ucsc/azul.git - fetch = +refs/heads/*:refs/remotes/gitlab.anvil.dev/* - - #. Confirm access to fetch branches:: - - git fetch -v gitlab.dcp2.dev - From ssh.gitlab.dev.singlecell.gi.ucsc.edu:ucsc/azul - = [up to date] develop -> gitlab.dcp2.dev/develop - = [up to date] issues/amar/2653-es-2-slow -> gitlab.dcp2.dev/issues/amar/2653-es-2-slow` - -* Standardize remote repository names. If the name of the remote repository on - GitHub is set to ``origin`` rename the remote repository to ``github``. Run:: - - git remote rename origin github - -Operator jobs -------------- - -First order of business: add a calendar event for the next scheduled operator -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As soon as your shift begins and before performing any other actions as an -operator create the following Google Calendar event in the `Team Boardwalk -calendar`_. - -Create an all-day calendar event for the two weeks after your current stint, -using the title ``Azul Operator: `` with the name of the operator who will -be serving next. - -If you are aware of any schedule irregularities, such as one operator performing -more than one consecutive stints, create events for those as well. - -.. _`Team Boardwalk calendar`: https://calendar.google.com/calendar/u/0/r?cid=dWNzYy5lZHVfMDRuZ3J1NXQzNDB0aWd0cW5qYWQ5Nm5jOWtAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ - -Check weekly for Amazon OpenSearch Service updates -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The operator checks weekly for notifications about service software updates to -Amazon OpenSearch Service domains for all Azul deployments. Note that service -software updates are distinct from updates to the upstream version of -ElasticSearch (or Amazon's OpenSearch fork) in use on an ES domain. While the -latter are discretional and applied via a change to TerraForm configuration, -some of the latter are mandatory. - -Unless we intervene, AWS will automatically force the installation of any update -about which we receive a ``High`` severity notification, typically two weeks -after the notification was sent. Read `Amazon notification severities`_ for more -information. The operator must prevent the automatic installation of such -updates. It would be disastrous if an update were to be applied during a reindex -in ``prod``. Instead, the operator must apply the update manually as part of an -operator ticket in GitHub, as soon as possible, and well before Amazon would -apply it automatically. - -To check for, and apply, if necessary, any pending service software updates, the -operator performs the following steps daily. - -1. In *Amazon OpenSearch Service Console* select the *Notifications* pane and - identify notifications with subject ``Service Software Update``. - -2. Record the severity, date and the ES domain name of these notifications. - Collect this information for all ES domain in both the ``prod`` and ``dev`` - AWS accounts. If there are no notifications, you are done. - -3. Open a new ticket in GitHub and title it ``Apply Amazon OpenSearch (ES) - Software Update (before {date})``. Include ``(before {date})`` in the title - if any notification is of ``High`` severity, representing a forced update. - Replace ``{date}`` with the anticipated date of the forced installation. If - there already is an open ticket for pending updates, reuse that ticket and - adjust it accordingly. - -4. If title contains a date, pin the ticket as *High Priority* in ZenHub. - -5. The description of the ticket should include a checklist item for each ES - domain recorded in step 2. The checklist should include items for notifying - the team members about any disruptions to their personal deployments, say, - when the ``sandbox`` domain is being updated. - - Use this template for the checklist:: - - - [ ] Update `azul-index-dev` - - [ ] Update `azul-index-anvildev` - - [ ] Update `azul-index-anvilprod` - - [ ] Confirm with Azul devs that their personal deployments are idle - - [ ] Update `azul-index-sandbox` - - [ ] Update `azul-index-anvilbox` - - [ ] Update `azul-index-hammerbox` - - [ ] Update `azul-index-prod` - - [ ] Confirm snapshots are disabled on all domains - - `aws opensearch describe-domains --domain-name | jq '.DomainStatusList[].SnapshotOptions'` - - Value of `AutomatedSnapshotStartHour` should be `-1` - - Note that, somewhat counterintuitively, main deployments are updated before - their respective ``sandbox``. If, during step 3, updates or domains were - added to an existing ticket, the entire process may have to be restarted and - certain checklist items may need to be reset. - -6. To update an ES domain, select it the Amazon OpenSearch Service console. - Under *General information*, the *Service software version* should have an - *Update available* hyperlink. Click on it and follow the subsequent - instructions. - -7. Once the upgrade process is completed for the ``dev`` or ``prod`` ES domain, - perform a smoke test using the respective Data Browser instance. - -.. _`Amazon notification severities`: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-notifications.html#managedomains-notifications-severities - -Review counts -^^^^^^^^^^^^^ - -When verifying accuracy of the ``review count`` label, search for the string -``hannes-ucsc requested`` on the PR page. Make sure to check for comments that -indicate if a review count was not bumped. - -Testing a PR in the ``sandbox`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The operator sets ``sandbox`` label on a PR before pushing the PR branch to -GitLab. If the resulting sandbox build passes, the PR is merged and the label -stays on. If the build fails, the label is removed. Only one un-merged PR should -have the label. - -If the tests fail while running a sandbox PR, an operator should do minor -failure triage. - -Triaging ``sandbox`` failures -""""""""""""""""""""""""""""" - -* If the PR fails because of out-of-date requirements on a PR with the ``[R]`` - tag the operator should rerun ``make requirements_update``, `committing the - changes separately`_ with a title like ``[R] Update requirements``. It is not - necessary to re-request a review after doing so. - -* For integration test failures, check if the PR has the ``reindex`` tag. If so, - running an early reindex may resolve the failure. - -* Determine if the failure could have been caused by the changes in the PR. If - so, there is no need to open up a new ticket. Bounce the PR back to the "In - progress" column and notify the author of the failure. Ideally provide a link. - -* All other build failures need to be tracked in tickets. If there is an - existing ticket, comment on it with a link to the failed job and move the - ticket to Triage. If there is no existing ticket resembling the failed build, - create a new one, with a link to the failed build, a transcript of any - relevant error messages and stack traces from the build output, and any - relevant log entries from CloudWatch. - -Triaging GitLab build failures on ``dev`` and ``prod`` -"""""""""""""""""""""""""""""""""""""""""""""""""""""" - -If a GitLab build fails on a main deployment, the operator must evaluate the -impact of that failure. This evaluation should include visiting the Data Browser -to verify it isn't broken. - -To restore the deployment to a known working state, the operator should rerun -the deploy job of previous passing pipeline for that deployment. This can be -done without pushing anything and only takes a couple of minutes. The branch for -that deployment must then be reverted to the previously passing commit. - -.. _committing the changes separately: https://github.com/DataBiosphere/azul/issues/2899#issuecomment-804508017 - -Reindexing -^^^^^^^^^^ - -During reindexing, watch the ES domain for unassigned shards, using the AWS -console. The ``azul-prod`` CloudWatch dashboard has a graph for the shard count. -It is OK to have unassigned shards for a while but if the same unassigned shards -persist for over an hour, they are probably permanently unassigned. Follow the -procedure outlined in `this AWS support article`_, using either Kibana or -Cerebro. Cerebro has a dedicated form field for the index setting referenced in -that article. In the past, unassigned shards have been caused by AWS attempting -to make snapshots of the indices that are currently being written to under high -load during reindexing. Make sure that ``GET _cat/snapshots/cs-automated`` -returns nothing. Make sure that the *Start Hour* under *Snapshots* on the -*Cluster confguration* tab of the ES domain page in the AWS console is shown as -``0-1:00 UTC``. If either of these checks fails, file a support ticket with AWS -urgently requesting snapshots to be disabled. - -.. _this AWS support article: https://aws.amazon.com/premiumsupport/knowledge-center/opensearch-in-memory-shard-lock/ - -The operator must check the status of the queues after every reindex for -failures. Use ``python scripts/manage_queues.py`` to identify any failed -messages. If failed messages are found, use ``python scripts/manage_queues.py`` -to - -- dump the failed notifications to JSON file(s), using ``--delete`` to - simultaneously clear the ``notifications_fail`` queue - -- force-feed the failed notifications back into the ``notifications_retry`` - queue. We feed directly into the retry queue, not the primary queue, to save - time if/when the messages fail again. - -This may cause the previously failed messages to succeed. Repeat this procedure -until the set of failed notifications stabilizes, i.e., the -``notifications_fail`` queue is empty or no previously failed notifications -succeeded. - -Next, repeat the dump/delete/force-feed steps with the failed tallies, feeding -them into ``tallies_retry`` queue (again, **NOT** the primary queue) until the -set of failed tallies stabilizes. - -If at this point the fail queues are not empty, all remaining failures must be -tracked in tickets: - -- document the failures within the PR that added the changes - -- triage against expected failures from existing issues - -- create new issues for unexpected failures - -- link each failure you document to their respective issue - -- ping people on the Slack channel ``#dcp2`` about those issues, and finally - -- clear the fail queues so they are empty for the next reindexing - -For an example of how to document failures within a PR `click here`_. - -.. _click here: https://github.com/DataBiosphere/azul/pull/3050#issuecomment-840033931 - -Reindexing a specific catalog or sources in GitLab -"""""""""""""""""""""""""""""""""""""""""""""""""" - -From the GitLab web app, select the ``reindex`` or ``early_reindex`` job for -the pipeline that needs reindexing of a specific catalog. From there, you -should see an option for defining the key and value of additional variables to -parameterize the job with. - -To specify a catalog to be reindexed, set ``Key`` to ``azul_current_catalog`` -and ``Value`` to the name of the catalog, for example, ``dcp3``. To specify the -sources to be reindexed, set ``Key`` to ``azul_current_sources`` and -``Value`` to a space-separated list of sources globs, e.g. -``*:hca_dev_* *:lungmap_dev_*``. Check the inputs you just -made. Start the ``reindex`` job by clicking on ``Run job``. Wait until the job -has completed. - -Repeat these steps to reindex any additional catalogs. - - -Updating the AMI for GitLab instances -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As part of the upgrades isssue, operators must check for updates to the AMI for -the root volume of the EC2 instance running GitLab. We use a hardened — to the -requirements of the CIS Amazon Linux 2 benchmark — variant of Amazon's Linux 2 -AMI. The license to use the AMI for an EC2 instance is sold by CIS as a -subscription on the AWS Marketplace: - -https://aws.amazon.com/marketplace/pp/prodview-wv574yqgjv6jg - -The license costs $0.024 per instance/hour. Every AWS account must subscribe -separately. - -There are ways to dynamically determine the latest AMI released by CIS under the -subscription but in the spirit of reproducible builds, we would rather pin the -AMI ID and adopt updates at our own discretion to avoid unexpected failures. - -Note that the AMI versioning scheme (e.g., ``v01``, ``v11``) indicates the month -of release, and is not a monotonically increasing value. - -To obtain the latest compatible AMI ID, select the desired ``….gitlab`` -component, say, ``_select dev.gitlab`` and run - -:: - - aws ec2 describe-images \ - --owners aws-marketplace \ - --filters="Name=name,Values=*abcfcbaf-134e-4639-a7b4-fd285b9fcf0a*" \ - | jq -r '.Images[] | .CreationDate+"\t"+.ImageId+"\t"+.Name' \ - | sort \ - | tail -1 - -This prints the date, ID and name of the latest CIS-hardened AMI. Update the -``ami_id`` variable in ``terraform/gitlab/gitlab.tf.json.template.py`` to refer -to the AMI ID. Update the image name in the comment right above the variable so -that we know which semantic product version the AMI represents. AMIs are -specific to a region so the variable holds a dictionary with one entry per -region. If there are ``….gitlab`` components in more than one AWS region (which -is uncommon), you need to select at least one ``….gitlab`` component in each of -these regions, rerun the command above for each such component, and add or -update the ``ami_id`` entry for the respective region. Instead of selecting a -``….gitlab`` component, you can just specify the region of the component using -the ``--region`` option to ``aws ec2 describe-images``. - -Upgrading GitLab & ClamAV -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Operators check for updates to the Docker images for GitLab and ClamAV as part -of the biweekly upgrade process, and whenever a GitLab security releases -requires it. An email notification is sent to ``azul-group@ucsc.edu`` when a -GitLab security release is available. Discuss with the lead the **Table of -Fixes** referenced in the release blog post to determine the urgency of the -update. When updating the GitLab version, either as part of the regular update -or when necessary, check if there are applicable updates to the `GitLab runner -image`_ as well. Use the latest runner image whose major and minor version match -that of the GitLab image. When upgrading across multiple GitLab versions, follow -the prescribed GitLab `upgrade path`_. You will likely only be able to perform -a step on that path per biweekly upgrade PR. - -.. _upgrade path: https://docs.gitlab.com/ee/update/index.html#upgrade-paths - -Before upgrading the GitLab version, create a backup of the GitLab volume. See -`Backup GitLab volumes`_ for help. - -Upgrade direct Python dependencies -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In PyCharm, use `Package tool window`_ to view the most recent versions of -the project's direct Python dependencies. This feature may only work properly -after running ``make envhook``, and correctly configuring the Python interpreter -for the project (at least once before). - -Proceed by identifying the packages that are candidates for upgrades. Check the -dependencies listed in ``requirements.txt`` and ``requirements.dev.txt`` against -the Package tool window, where the dependency indicates of an available version. -When updating: - - * Update to the latest mature release (a release with a high patch number or - where the most recent patch release is at least a couple of months old) and go - backward if problems occur. - - * Document each of these problems with a dedicated FIXME, with its respective - ticket & reference, when non-trivial code base changes are necessary due to - a package version upgrade. - - * Reference the GitHub link in a comment beside the conflictive package. - - * If updating a package causes a trivial change or a dismissable warning when - including a FIXME (e.g., deprecation warnings), it should be done on its own - commit, to easily identify the dependencies forcing the change and the given - resolution. - -Note, a way to display all available versions of a given package in a concise -way, is to pretend to install a non-existing version from a terminal console -via the pip command. For example, to see all available versions of ``flake8`` -one may run ``pip install flake8=9.9.9``, and the output will display all -versions of the dependency. - -As always, each of the committed changes should be tested, and should -independently succeed all feature branch checks in GitHub, etc. Perform the -following for smoke-testing basic operations and functions: - - #. Recreate the project's virtualenv from scratch, run the ``requirements`` - target, run the ``envhook`` target and end with ``requirements_update``. - - #. Run the ``test``, and ``deploy`` targets in personal deployment (or via - sandbox) and then run the integration test. - -.. _Package tool window: https://www.jetbrains.com/guide/python/tutorials/getting-started-pycharm/installing-and-managing-python-packages/ - -Increase GitLab data volume size -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When the CloudWatch alarm for high disk usage on the GitLab data volume goes -off, you must attach a new, larger volume to the instance. Run the command below -to create both a snapshot of the current data volume and a new data volume with -the specified size restored from that snapshot. - -Discuss the desired new size with the system administrator before running the -command:: - - python scripts/create_gitlab_snapshot.py --new-size [new_size] - -When this command finishes, it will leave the instance in a stopped state. Take -note of the command logged by the script. You'll use it to delete the old data -volume after confirming that GitLab is up and running with the new volume -attached. - -Next, deploy the ``gitlab`` TF component in order to attach the new data volume. -The only resource with changes in the resulting plan should be -``aws_instance.gitlab``. Once the ``gitlab`` TF component has been deployed, -start the GitLab instance again by running:: - - python scripts/create_gitlab_snapshot.py --start-only - -Finally, SSH into the instance to complete the setup of new data volume. Use the -``df`` command to confirm the size and mount point of the device, and -``resize2fs`` to grow the size of the mounted file system so that it matches -that of the volume. Run:: - - df # Verify device /dev/nvme1n1 is mounted on /mnt/gitlab, note available size - sudo resize2fs /dev/nvme1n1 - df # Verify the new available size is larger - -The output of the last ``df`` command should inform of the success of these -operations. A larger available size compared to the first run indicates that -the resizing operation was successful. You can now delete the old data volume by -running the deletion command you noted earlier. - -Backup GitLab volumes -^^^^^^^^^^^^^^^^^^^^^ - -Use the ``create_gitlab_snapshot.py`` script to back up the EBS data volume -attached to each of our GitLab instances. The script will stop the instance, -create a snapshot of the GitLab EBS volume, tag the snapshot and finally restart -the instance:: - - python scripts/create_gitlab_snapshot.py - -For GitLab or ClamAV updates, use the ``--no-restart`` flag in order to leave -the instance stopped after the snapshot has been created. There is no point in -starting the instance only to have the update terminate it again. - -Updating software packages on GitLab instances -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Once a week, operators must update all Linux packages installed on the root -volume of each GitLab instance. SSH access to the instances is necessary to -perform these instructions but on production instances this access is -unavailable, even to operators. In these cases the operator must request the -help of the system administrator via Slack to perform these steps. - -SSH into the instance, and run ``sudo yum update`` followed by ``sudo reboot``. -Wait for the GitLab web application to become available again and perform a -``git fetch`` from one of the Git repositories hosted on that instance. - -Updating the Swagger UI -^^^^^^^^^^^^^^^^^^^^^^^ - -Operators should regularly check for available updates to the Swagger UI. The -current version used by Azul is hardcoded in ``scripts/update_swagger.py``. The -upstream source is located here: - -https://github.com/swagger-api/swagger-ui/tree/master/dist - -Scheduled upgrade PR's should only include minor and hotfix updates to the -Swagger UI. If a new major version is available, open a new issue instead. To -perform the update, edit the ``tag`` variable in the ``update_swagger`` script -and run it. If there are nontrivial changes to the ``swagger-initializer.js`` or -``oauth2-redirect.html`` files, cancel the update and open a new issue instead. -Otherwise, forward any changes to those two files to their respective mustache -template files, and commit the changes to the script and all modified files in -the ``swagger/`` directory. The commit message must include the new tag, as well -as a link to the upstream source in the commit body, e.g.:: - - Update Swagger UI to v (#issue-number) - - https://github.com/swagger-api/swagger-ui/tree/v/dist - - -Export AWS Inspector findings -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -#. ``_select anvilprod`` - -#. Run ``python scripts/export_inspector_findings.py`` to generate a CSV file - -#. Open the `Anvilprod Inspector Findings spreadsheet`_ - -#. Select ``File`` > ``Import`` to import the generated CSV, and on the ``Import - file`` dialog use these options: - - - Import location: Insert new sheet(s) - - - Convert text to numbers, dates, and formulas: Checked - -#. Rename the new tab using ``YYYY-MM-DD`` with the date of the upgrade issue, - and move it to the front of the stack - -#. Apply visual formatting (e.g. column width) to the sheet using a previous - sheet as a guide - -.. _Anvilprod Inspector Findings spreadsheet: https://docs.google.com/spreadsheets/d/1RWF7g5wRKWPGovLw4jpJGX_XMi8aWLXLOvvE5rxqgH8/edit#gid=1657352747 - -Adding snapshots to ``dev`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When adding a new snapshot to ``dev``, ``anvildev``, the operator should also -add the snapshot to ``sandbox`` or ``anvilbox``, respectively. - -The ``post_deploy_tdr.py`` script will fail if the computed common prefix -contains an unacceptable number of subgraphs. If the script reports that the -common prefix is too long, truncate it by 1 character. If it's too short, append -1 arbitrary hexadecimal character. Pass the updated prefix as a keyword argument -to the ``mksrc`` function for the affected source(s), including a partition -prefix length of 1. Then refresh the environment and re-attempt the deployment. - -Adding snapshots to ``prod`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We decide on a case-by-case basis whether PRs which update or add new snapshots -to ``prod`` should be filed against the ``prod`` branch instead of ``develop``. -When deciding whether to perform snapshot changes directly to ``prod`` or -include them in a routine promotion, the system admin considers the scope of -changes to be promoted. It would be a mistake to promote large changes in -combination with snapshots because that would make it difficult to diagnose -whether indexing failures are caused by the changes or the snapshots. - -Removing catalogs from ``prod`` and setting a new default -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -PRs which remove catalogs or set a new default for ``prod`` should be filed -against the ``prod`` branch instead of ``develop``. - -When setting a new default catalog in ``prod``, the operator shall also delete -the old default catalog unless the ticket explicitly specifies not to delete the -old catalog. - -Add a checklist item at the end of the PR checklist to file a back-merge PR from -``prod`` to ``develop``. - -Add another checklist item instructing the operator to manually delete the old -catalog. - -Promoting to ``prod`` -^^^^^^^^^^^^^^^^^^^^^ - -Promotions to ``prod`` should happen weekly on Wednesdays, at 3pm. We promote -earlier in the week in order to triage any potential issues during reindexing. -We promote at 3pm to give a cushion of time in case anything goes wrong. - -To do a promotion: - -#. Decide together with lead up to which commit to promote. This commit will be - the HEAD of the promotions branch. - -#. Create a new GitHub issue with the title ``Promotion yyyy-mm-dd`` - -#. Make sure your ``prod`` branch is up to date with the remote. - -#. Create a branch at the commit chosen above. Name the branch correctly. See - `promotion PR template`_ for what the correct branch name is. - -#. File a PR on GitHub from the new promotion branch and connect it to the - issue. The PR must target ``prod``. Use the `promotion PR template`_. - -#. Request a review from the primary reviewer. - -#. Once PR is approved, announce in the `#team-boardwalk Slack channel`_ that - you plan to promote to ``prod`` - -#. Search for and follow any special ``[u]`` upgrading instructions that were - added. - -#. When merging, follow the checklist and making sure to carry over any commit - title tags (``[u r R]`` for example) into the default merge commit title - e.g., ``[u r R] Merge branch 'promotions/2022-02-22' into prod``. Don't - rebase the promotion branch and don't push the promotion branch to GitLab. - Merge the promotion branch into ``prod`` and push the merge commit on the - ``prod`` branch first to GitHub and then to the ``prod`` instance of GitLab. - -.. _promotion PR template: /.github/PULL_REQUEST_TEMPLATE/promotion.md - -Backporting from ``prod`` to ``develop`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -There should only ever be one open backport PR against ``develop``. If more -commits accumulate on ``prod``, waiting to be backported, close the existing -backport PR first. The new PR will include the changes from the old one. - -#. Make a branch from ``prod`` at the most recent commit being backported. Name - the branch following this pattern:: - - backports/<7-digit SHA1 of most recent backported commit> - -#. Open a PR from your branch, targeting ``develop``. The PR title should be - - :: - - Backport: <7-digit SHA1 of most recent backported commit> (#, PR #) - - Repeat this pattern for each of the older backported commits, if there are - any. An example commit title would be - - :: - - Backport 32c55d7 (#3383, PR #3384) and d574f91 (#3327, PR #3328) - - Be sure to use the PR template for backports by appending - ``&template=backport.md`` to the URL in your browser's address bar. - -#. Assign and request review from the primary reviewer. The PR should only be - assigned to one person at a time, either the reviewer or the operator. - -#. Perform the merge. The commit title should match the PR title :: - - git merge prod --no-ff - -#. Push the merge commit to ``develop``. It is normal for the branch history to - look very ugly following the merge. - -.. _#team-boardwalk Slack channel: https://ucsc-gi.slack.com/archives/C705Y6G9Z - - -Deploying the Data Browser -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The Data Browser is deployed in two steps. The first step is building the -``ucsc/data-browser`` project on GitLab. This is initiated by pushing a branch -whose name matches ``ucsc/*/*`` to one of our GitLab instances. The resulting -pipeline produces a tarball stored in the package registry on that GitLab -instance. The second step is running the ``deploy_browser`` job of the -``ucsc/azul`` project pipeline on that same instance. This job creates or -updates the necessary cloud infrastructure (CloudFront, S3, ACM, Route 53), -downloads the tarball from the package registry and unpacks that tarball to the -S3 bucket backing the Data Browser's CloudFront distribution. - -Typically, CC requests the deployment of a Data Browser instance on Slack, -specifying the commit (tag or sha1) they wish to be deployed. After the -system administrator approves that request, the operator pushes the specified tag -(if a tag was specified) to the GitLab instance for the Azul ``{deployment}`` -that backs the Data Browser instance to be deployed. Then the specified tag (or -commit, if no tag was specified) is merged into one of the -``ucsc/{atlas}/{deployment}`` branches. That branch is then is pushed to the -``DataBiosphere/data-browser`` project on GitHub, and the ``ucsc/data-browser`` -project on GitLab (same instance as above). For the merge commit title, -SmartGit's default can be used, as long as the title reflects the commit (tag or -sha1) specified by CC. - -The ``{atlas}`` placeholder can be ``hca``, ``anvil`` or ``lungmap``. Not all -combinations of ``{atlas}`` and ``{deployment}`` are valid. Valid combinations -are ``ucsc/anvil/anvildev``, ``ucsc/anvil/anvilprod``, ``ucsc/hca/dev``, -``ucsc/hca/prod``, ``ucsc/lungmap/dev`` or ``ucsc/lungmap/prod``, for example. -The ``ucsc/data-browser`` pipeline on GitLab blindly builds any branch, but -Azul's ``deploy_browser`` job is configured to only use the tarball from exactly -one branch (see ``deployments/*.browser/environment.py``) and it will always use -the tarball from the most recent pipeline on that branch. - - -Running a ZAP vulnerability scan -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Setting up ZAP -"""""""""""""" - -Follow these steps to set up the ZAP application for scanning the HCA and -AnVIL systems. This set up only needs to be completed once, and for future scans -you can simply jump to `Launching ZAP`_. - -#. Download ZAP from https://www.zaproxy.org/ . - -#. Install & open ZAP. - -#. From the popup, select the *No, I do not want to persis this session at this - moment in time* option and click *Start*. - -#. Confirm that ZAP is configured to run in standard mode by first selecting - *Edit* from the app menu bar, then *ZAP Mode*, then selecting *Standard - Mode*. - -#. To prevent ZAP scans from exceeding Azul's request rate limit and being - temporarily blocked by the system, you will need to configure the maximum - rate of requests that ZAP will send out. From the app menu bar, select - *Tools*, then *Options*, then *Network*, then *Rate Limit*. Add and enable a - three request per second rule for the match string ``anvilprod.org``, and - another rule for the match string ``humancellatlas.org``. - -#. With the *Options* window open, select *Check for Updates* from the list of - options. Confirm that both *Check for updates on startup*, and *Check for - updates to the add-ons you have installed* are enabled. - -#. Click *OK* to close the *Options* window, and then proceed to exit the ZAP - application. - -Launching ZAP -""""""""""""" - -All scans need be run with authenticated requests. The process for running an -authenticated scan is to first obtain an Azul authentication token, and then -launch the ZAP application with the token set as an environment variable. ZAP -will then use your token to add an authentication header to all requests made -during the scan. See the `ZAP documentation`_ for more information. - -.. _`ZAP documentation`: https://www.zaproxy.org/docs/getting-further/authentication/handling-auth-yourself/ - -Follow these steps to get an authorization token from Azul: - -#. Open the Swagger UI for the appropriate (HCA or AnVIL) Azul service. - -#. Click *Authorize*, select all scopes, click *Authorize*, then *Close* to - complete the authorization. - -#. Using the Swagger UI, execute an endpoint such as ``/index/catalogs``. - -#. Locate the example ``curl`` command that Swagger produces for you, and copy - the token value from the ``Authorization`` header (e.g. ``Bearer ya29.a0…``). - -Using the token copied above, you can now set an environment variable and launch -ZAP from the command line. Open a terminal window, and run: - -#. ``export ZAP_AUTH_HEADER_VALUE=""`` - -#. ``/Applications/ZAP.app/Contents/MacOS/ZAP.sh`` - -After the ZAP application has opened, follow the steps below to `create a new -session`_ and run a scan. After your scan has completed and you have generated -a report, close the ZAP application, and then repeat the steps above to start -each additional scan with a fresh authentication token. - -.. _`create a new session`: #zap-sessions - -ZAP Sessions -"""""""""""" - -With the ZAP application open, you must start a new session prior to running a -new scan. Failure to do so can pollute the scan results with the findings from -the previous scan. A new session is created each time you launch ZAP, or -alternatively, to manually open a new session, from the app menu bar select -*File*, and then *New Session*. - -If you are prompted with options to persist the ZAP session, select the *No, I -do not want to persis this session at this moment in time* option and click -*Start*. - -You may now continue with either a `Data Portal / Browser scan`_ or `Azul -Indexer / Service API scan`_. - -.. _`Portal / Browser scan`: #running-a-portal-browser-scan -.. _`Azul Indexer / Service API scan`: #running-an-azul-indexer-service-api-scan - -Running a Data Portal / Browser scan -"""""""""""""""""""""""""""""""""""" - -#. Using the *Quick Start* tab, click *Automated Scan*. - -#. Enter the desired URL (e.g. https://anvilproject.org/) in the *URL to attack* - field. - -#. Enable the *Use traditional spider* option. - -#. Select *If modern* from the *Use ajax spider* option, and *Firefox Headless* - from the *With* option. - -#. Click *Attack* to begin the scan. - -#. Wait until all the scans (Ajax spider, passive scans, etc.) have completed. - In practice, this can take up to four hours depending on the target URL. Note - that you will not receive a notification when the scans have completed. - Instead, take note of the *Current Status* values in the ZAP window footer. - Proceed when all scan counts show ``0``. - -#. Continue with the steps below to `generate a report`_. - -.. _`generate a report`: #generating-a-zap-report - -Running an Azul Indexer / Service API scan -"""""""""""""""""""""""""""""""""""""""""" - -In order to run an API scan you must first import the OpenAPI definition: - -#. From the app menu bar, select *Import*, then *Import an OpenAPI Definition*. - -#. Enter the URL of the OpenAPI definition (e.g. - https://service.explore.anvilproject.org/openapi.json) in the *URL* field. - -#. Click *Import* to complete start the import. - -After the import of the OpenAPI definition completes, you can then proceed to -run an automated scan using the same steps as when running an `Data Portal / -Browser scan`_. For the *URL to attack*, enter the base URL of the Azul indexer -or service with no additional path components (e.g. -https://service.explore.anvilproject.org/). - -.. _`Data Portal / Browser scan`: #running-a-data-portal-browser-scan - -Generating a ZAP Report -""""""""""""""""""""""" - -After a scan has completed, use the following steps to save a PDF export of the -scan results. - -#. From the app menu bar, select *Report*, then *Generate Report*. - -#. Navigate to the *Template* tab of the *Generate Report* window, and select - *Traditional PDF Report* from the *Template* option. - -#. Navigate to the *Scope* tab, and enter a value such as "AnVIL Data Portal" - in the *Report Title* field. - -#. The *Report Name* field specifies the name of the file to be created. Enter - a value such as "2025-01-01-anvil-data-portal.pdf" in this field. - -#. Click *Generate Report* to complete the export. - - -Troubleshooting ---------------- - -Credentials expire in the middle of a long-running operation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In some instances, deploying a Terraform component can take a long time. While -``_login`` now makes sure that there are four hours left on the current -credentials, it can't do that if you don't call it before such an operation. -Note that ``_select`` also calls ``_login``. The following is a list of -operations which you should expect to take an hour or longer: - -- the first time deploying any component - -- deploying a plan that creates or replaces an Elasticsearch domain - -- deploying a plan that involves ACM certificates - -- deploying a ``shared`` component after modifying - ``azul_docker_images`` in ``environment.py``, especially on a slow uplink - -To make things worse, if the credentials expire while Terraform is updating -resources, it will not be able to write the partially updated state back to the -shared bucket. A subsequent retry will therefore likely report conflicts due to -already existing resources. The rememdy is to import those existing resources -into the Terraform state using ``terraform import``. - -Push errors -^^^^^^^^^^^ - -If an error occurs when pushing to the develop branch, ensure that the branch -you would like to merge in is rebased on develop and has completed its CI -pipeline. If there is only one approval (from the primary reviewer) an operator -may approve a PR that does not belong to them. If the PR has no approvals (for -example, it belongs to the primary reviewer), the operator may approve the PR -and seek out another team member to perform the second needed review. When -making such a pro-forma review, indicate this within the review summary -(`example`_). - -.. _example: https://github.com/DataBiosphere/azul/pull/2646#pullrequestreview-572818767 - -PR Closed automatically and can't be reopened -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This can happen when a PR is chained on another PR and the base PR is merged and -its branch deleted. To solve this, first restore the base PR branch. The -operator should have a copy of the branch locally that they can push. If not, -then the PR's original author should. - -Once the base branch is restored, the ``Reopen PR`` button should again be -clickable on the chained PR. - -Integration test time out -^^^^^^^^^^^^^^^^^^^^^^^^^ - -This can happen on the rare occasion that the IT's random selection of bundles -happens to pick predominantly large bundles that need to be partitioned before -they can be indexed. This process can divide bundles into partitions, and divide -partitions into sub-partitions, since technically bundles are partitions with an -empty prefix. - -In the AWS console, run the CloudWatch Insights query below with the indexer log -groups selected to see how many divisions have occurred:: - - fields @timestamp, @log, @message - | filter @message like 'Dividing partition' - | parse 'Dividing partition * of bundle *, version *, with * entities into * sub-partitions.' as partition, bundle, version, enities, subpartitions - | display partition, bundle, version, enities, subpartitions - | stats count(@requestId) as total_count by bundle, partition - | sort total_count desc - | sort @timestamp desc - | limit 1000 - -Note that when bundles are being partitioned, errors of exceeded rate & quota -limits should be expected:: - - [ERROR] TransportError: TransportError(429, '429 Too Many Requests /azul_v2_prod_dcp17-it_cell_suspensions/_search') - - [ERROR] Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects/...: Quota exceeded: Your project:XXXXXXXXXXXX exceeded quota for tabledata.list bytes per second per project. For more information, see https://cloud.google.com/bigquery/docs/troubleshoot-quotas - - -Follow these steps to retry the IT job: - -#. Cancel the ongoing IT job (if in progress) - -#. Comment on `issue #4299`_ with a link to the failed job - -#. Purge the queues:: - - python scripts/manage_queues.py purge_all - -#. Rerun the IT job - -.. _`issue #4299`: https://github.com/DataBiosphere/azul/issues/4299 - -GitHub bot account ------------------- - -Continuous integration environments (GitLab, Travis) may need a GitHub token to -access GitHub's API. To avoid using a personal access token tied to any -particular developer's account, we created a Google Group called -``azul-group@ucsc.edu`` of which Hannes is the owner. We then used that group -email to register a bot account in GitHub. Apparently that's ok: - - User accounts are intended for humans, but you can give one to a robot, such as a continuous integration bot, if necessary. - - (https://docs.github.com/en/github/getting-started-with-github/types-of-github-accounts#personal-user-accounts) - -Only Hannes knows the GitHub password of the bot account but any member of the -group can request the password to be reset. All members will receive the -password reset email. Hannes knows the 2FA recovery codes. - -Handing over operator duties ----------------------------- - -#. Old operator must finish any merges in progress. The sandbox should be empty. - The new operator should inherit a clean slate. This should be done before the - first working day of the new operator's shift. - -#. Old operator must re-assign `all tickets in the approved column`_ to the new - operator. - -#. Old operator must re-assign expected indexing failure tickets to the new - operator, along with ticket that tracks operator duties. - -#. New operator must request the necessary permissions, as specified in `Getting - started as operator`_. - -.. _all tickets in the approved column: https://github.com/DataBiosphere/azul/pulls?q=is%3Apr+is%3Aopen+reviewed-by%3Ahannes-ucsc+review%3Aapproved diff --git a/UPGRADING.rst b/UPGRADING.rst deleted file mode 100644 index 1a58d375be..0000000000 --- a/UPGRADING.rst +++ /dev/null @@ -1,3004 +0,0 @@ -Upgrading ---------- - -.. |deprecated| raw:: html - - - -.. |end_deprecated| raw:: html - - - - -This file documents any upgrade procedure that must be performed. Because we -don't use a semantic version, a change that requires explicit steps to upgrade a -is referenced by its Github issue number. After checking out a branch that -contains a commit resolving an issue listed here, the steps listed underneath -the issue need to be performed. When switching away from that branch, to a -branch that does not have the listed changes, the steps would need to be -reverted. This is all fairly informal and loosely defined. Hopefully we won't -have too many entries in this file. - - -#6779 Switch ES domain to OpenSearch 2.19 -========================================= - -Operator --------- - -Before pushing the PR branch to GitLab ``dev`` or ``anvildev``, confirm with -team mates on Slack that all personal deployments sharing a domain with the -respective sandbox deployment are idle. - -Before pushing these changes to any GitLab instance, the null_resource that -manages the cluster settings resource must be tainted. For some reason, -Terraform does not trigger an update to it even though the domain resources it -depends on is recreated:: - - cd azul - . environment - _select … - _login - make -C lambdas indexer service - cd terraform - make config - terraform taint null_resource.cluster_settings - -If the deployment is a sandbox, both ``make`` invocations need to be prefix with -``CI_COMMIT_REF_NAME=develop ``. - -The ``deploy`` job on GitLab will fail due to a ResourceAlreadyExistsException -for the OpenSearch domain. This exception is raised because this PR changes the -Terraform resource type from the deprecated ``aws_elasticsearch_domain`` to the -recommended ``aws_opensearch_domain`` without changing the Terraform resource -name. Terraform does not realize that it can't create one resource while -destroying the other. Retrying the job will eventually succeed. - -Once the deploy job succeeds for ``sandbox`` or ``anvilbox``, notify all team -members to incorporate these changes into their branches and redeploy any -personal deployments that share a domain with these deployments. The changes can -be incorporated by rebasing a branch on this PRs branch, or by rebasing it on -develop once these changes land there a few hours later. - - -#7076 Populate schema property in info objects -============================================== - -Operator --------- -Empty out the mirror bucket of any shared deployment prior to running the build -for a commit that introduces these changes to that deployment. - -Everyone else -------------- - -Empty out your personal deployments' mirror buckets. - - -#6516 HCA schemas for descriptor lacks drs_uri property -======================================================= - -Using the sandbox deployment's ``environment.py`` as a model, remove from your -personal deployment's ``environment.py`` the sources removed from the ``dcp3`` -and ``lm2`` catalogs. - - -#6754 Managed access flag is missing from AnVIL deployments -=========================================================== - -Using the sandbox deployment's ``environment.py`` as a model, remove the ``ma`` -flag and related logic from your personal deployment's ``environment.py``. - - -#5736 Update to Python 3.12.x -============================= - -Update Python on your developer machines to version 3.12.7. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - - -#6531 Eliminate RepositoryPlugin.list_partitions -================================================ - -The subgraph counts of indexed sources are no longer tracked in the source tree. -For each of your personal deployments, in ``environment.py``: update the -``mksrc`` function, remove the ``subgraphs`` parameter from all of its call -sites, update the ``prefix`` parameter where is passed, and remove any functions -used to construct prefixes, e.g. ``common_prefix()``. Be careful to preserve any -flags such as ``ma`` or ``pop``. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - - -#6570 Upgrade dependencies 2024-09-16 -===================================== - -Update Python on your developer machines to version 3.11.10. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - - -#6355 Explicitly configure source type in environment files -=========================================================== - -The ``mksrc`` function in ``environment.py`` has been updated. For each of your -personal deployments, update the function and insert the string ``'biqguery'`` -as its first argument at all call sites. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - - -#6446 Base image of Azul image is not pinned to digest -====================================================== - -Developers on the operator roster -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Install `Skopeo`_. No additional configuration is required. Version 1.15.3 was -tested extensively with these changes. Since then version 1.16.0 was released -and limited testing suggests that it works as well. - -.. _Skopeo: https://github.com/containers/skopeo - - -#6468 Upgrade dependencies 2024-08-05 -===================================== - -Everyone -~~~~~~~~ - -Update Terraform on your developer machines to version 1.9.4. - - -#6426 Clean-up and generalize TDR source specs -============================================== - -The "snapshot/" string has been removed from TDR source specs, and the ``type`` -and ``domain`` fields have been added. - -Update the ``mksrc`` function in ``environment.py`` for each of your personal -deployments. As always, use the sandbox deployment's ``environment.py`` as a -model when upgrading personal deployments. - - -#6432 Upgrade dependencies 2024-07-22 -===================================== - -Everyone -~~~~~~~~ - -Update Terraform on your developer machines to version 1.9.3. - - -#6381 Update Terraform to 1.9.x -=============================== - -Everyone -~~~~~~~~ - -Update Terraform on your developer machines to version 1.9.2. - - -#6239 Promotions fail in hammerbox with dirty requirements -========================================================== - -Remove the variable ``AZUL_IS_SANDBOX`` from all personal deployments. - - -#4655 All bucket names should default to qualified_bucket_name() -================================================================ - -Check the value of ``AZUL_S3_BUCKET`` in every personal deployments. If the -value is either - -``edu-ucsc-gi-platform-anvil-dev-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}`` - -or - -``edu-ucsc-gi-platform-hca-dev-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}`` - -remove the variable from ``environment.py`` for that deployment and redeploy. -Otherwise, consult with the system administrator. - - -#6218 Delete hammerbox ES domain -================================ - -Operator -~~~~~~~~ - -Due to an open issue with the `Terraform provider opensearch`_, the ``deploy`` -job will fail on ``hammerbox`` when building the feature branch. After this -occurs, run the following commands:: - - _select hammerbox - cd $project_root/terraform - terraform state rm opensearch_cluster_settings.index - -Then, retry the ``deploy`` job on GitLab. It should now succeed. - -.. _Terraform provider opensearch: https://github.com/opensearch-project/terraform-provider-opensearch/issues/60 - - -DataBiosphere/azul-private#6 data-browser: Content Security Policy (CSP) Not Implemented -======================================================================================== - -The new environment variable ``AZUL_TERRA_SERVICE_URL`` has been added. As -always, use the sandbox deployment's ``environment.py`` as a model when -upgrading personal deployments. - - -DataBiosphere/azul-private#133 Disable split tunneling for GitLab VPN in prod and anvilprod -=========================================================================================== - -This change requires an update to your existing VPN connections for `prod` and -`anvilprod`. - -Run the following commands:: - - _select prod.gitlab # or anvilprod.gitlab - cd terraform/gitlab/vpn - make config > ~/azul-gitlab-prod.ovpn # or azul-gitlab-anvilprod.ovpn - -Then, remove the existing VPN connection and import the generated `.ovpn` file -to recreate it. Finally, delete the `.ovpn` file to prevent proliferation of the -private key. - - -#6046 Fix: VPC CIDR in ``anvildev`` is wrong -============================================ - -Operator -~~~~~~~~ - -Before deploying ``anvildev.gitlab`` ask team members to checkout ``develop`` -and to run the following commands in every one of their personal deployments -collocated with ``anvildev``:: - - _select … - make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_lambda_function.{indexer{,{_aggregate,_contribute}{,_retry},_indexercachehealth},service{,_manifest,_servicecachehealth}},aws_security_group.{indexer,service}} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - -This will destroy the VPC-dependent resources in their deployment, and should -allow for the destruction of the VPC in the next step. Ask team members to -confirm the completion of this step. After receiving confirmation from every -team member, checkout the PR branch and run the following commands:: - - _select anvilbox - CI_COMMIT_REF_NAME=develop make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_security_group.{elasticsearch,indexer,service},aws_elasticsearch_domain.index} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - cd .. - -Among the resources the above command destroys is the Elasticsearch domain that -hosts the indices for the ``anvilbox`` deployment and any personal deployments -sharing the domain with the ``anvilbox`` deployment. - -Repeat this for ``anvildev``. - -Deploy the ``gitlab`` component:: - - _select anvildev.gitlab - CI_COMMIT_REF_NAME=develop make -C terraform/gitlab - -This will destroy and recreate many more resources. It will most likely fail at -some point, either because of a missing dependency declaration in our TF config -or a bug in the Terraform AWS provider or in Terraform core. Manually delete any -resource mentioned in any error messages and retry the command. Once the command -completes successfully, ensure that the GitLab web application is functional. - -After successfully deploying the ``gitlab`` component, continue with the PR -checklist. Once the sandbox build succeeds, ask team members to checkout -``develop`` and to run the following commands in every one of their personal -deployments collocated with ``anvildev``:: - - _select … - make deploy - make reindex - -This will recreate their VPC-dependent resources previously destroyed and -repopulate their indices on the ``anvilbox`` domain. - -Complete the PR checklist. - - -#6047 Fix: VPC CIDR in ``anvilprod`` is wrong -============================================= - -Operator -~~~~~~~~ - -Before deploying ``anvilprod.gitlab`` ask team members to checkout ``develop`` -and to run the following commands in every one of their personal deployments -collocated with ``anvilprod``:: - - _select … - make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_lambda_function.{indexer{,{_aggregate,_contribute}{,_retry},_indexercachehealth},service{,_manifest,_servicecachehealth}},aws_security_group.{indexer,service}} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - -This will destroy the VPC-dependent resources in their deployment, and should -allow for the destruction of the VPC in the next step. Ask team members to -confirm the completion of this step. After receiving confirmation from every -team member, checkout the PR branch and run the following commands:: - - _select hammerbox - CI_COMMIT_REF_NAME=develop make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_security_group.{elasticsearch,indexer,service},aws_elasticsearch_domain.index} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - cd .. - -Among the resources the above command destroys is the Elasticsearch domain that -hosts the indices for the ``hammerbox`` deployment and any personal deployments -sharing the domain with the ``hammerbox`` deployment. - -Repeat this for ``anvilprod``. - -Deploy the ``gitlab`` component:: - - _select anvilprod.gitlab - CI_COMMIT_REF_NAME=develop make -C terraform/gitlab - -This will destroy and recreate many more resources. It will most likely fail at -some point, either because of a missing dependency declaration in our TF config -or a bug in the Terraform AWS provider or in Terraform core. Manually delete any -resource mentioned in any error messages and retry the command. Once the command -completes successfully, ensure that the GitLab web application is functional. - -After successfully deploying the ``gitlab`` component, continue with the PR -checklist. Once the sandbox build succeeds, ask team members to checkout -``develop`` and to run the following commands in every one of their personal -deployments collocated with ``anvilprod``:: - - _select … - make deploy - make reindex - -This will recreate their VPC-dependent resources previously destroyed and -repopulate their indices on the ``hammerbox`` domain. - -Complete the PR checklist. - - -#6048 Fix: VPC CIDR in ``prod`` is wrong -======================================== - -Operator -~~~~~~~~ - -Checkout the PR branch and run the following commands:: - - _select prod - CI_COMMIT_REF_NAME=prod make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_security_group.{elasticsearch,indexer,service},aws_elasticsearch_domain.index} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - cd .. - -Deploy the ``gitlab`` component:: - - _select prod.gitlab - CI_COMMIT_REF_NAME=prod make -C terraform/gitlab - -This will destroy and recreate many resources. It will most likely fail at some -point, either because of a missing dependency declaration in our TF config or a -bug in the Terraform AWS provider or in Terraform core. Manually delete any -resource mentioned in any error messages and retry the command. Once the command -completes successfully, ensure that the GitLab web application is functional. - -After successfully deploying the ``gitlab`` component, complete the PR -checklist. - - -#6045 Fix: VPC CIDR in ``dev`` is wrong -======================================= - -Operator -~~~~~~~~ - -Before deploying ``dev.gitlab`` ask team members to checkout ``develop`` and to -run the following commands in every one of their personal deployments collocated -with ``dev``:: - - _select … - make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_lambda_function.{indexer{,{_aggregate,_contribute}{,_retry},_indexercachehealth},service{,_manifest,_servicecachehealth}},aws_security_group.{indexer,service}} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - -This will destroy the VPC-dependent resources in their deployment, and should -allow for the destruction of the VPC in the next step. Ask team members to -confirm the completion of this step. After receiving confirmation from every -team member, checkout the PR branch and run the following commands:: - - _select sandbox - CI_COMMIT_REF_NAME=develop make deploy - cd terraform - terraform plan -out destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan -destroy -target={aws_security_group.{elasticsearch,indexer,service},aws_elasticsearch_domain.index} - terraform apply destroy_${AZUL_DEPLOYMENT_STAGE}.tfplan - cd .. - -Among the resources the above command destroys is the Elasticsearch domain that -hosts the indices for the ``sandbox`` deployment and any personal deployments -sharing the domain with the ``sandbox`` deployment. - -Repeat this for `dev`. - -Deploy the ``gitlab`` component:: - - _select dev.gitlab - CI_COMMIT_REF_NAME=develop make -C terraform/gitlab - -This will destroy and recreate many more resources. It will most likely fail at -some point, either because of a missing dependency declaration in our TF config -or a bug in the Terraform AWS provider or in Terraform core. Manually delete any -resource mentioned in any error messages and retry the command. Once the command -completes successfully, ensure that the GitLab web application is functional. - -After successfully deploying the ``gitlab`` component, continue with the PR -checklist. Once the sandbox build succeeds, ask team members to checkout -``develop`` and to run the following commands in every one of their personal -deployments collocated with ``dev``:: - - _select … - make deploy - make reindex - -This will recreate their VPC-dependent resources previously destroyed and -repopulate their indices on the ``sandbox`` domain. - -Complete the PR checklist. - - -#5964 SSM Agent in GitLab lacks sufficient permissions in its role -================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5984 AWS event GetMacieSession results in AccessDenied error again -=================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5970 Upgrade dependencies 2024-02-19 -===================================== - -Operator -~~~~~~~~ - -Manually perform a two-phase deployment of the ``shared`` component of every -main deployment. Perform the first phase using the ``apply_keep_unused`` -Makefile target for the lower deployments after pushing the squashed and rebased -PR branch to GitHub. In a stable deployment (``prod``), perform the first phase -before pushing the merge commit to the GitLab instance in that deployment. In -lower and stable deployments, perform the second phase using the ``apply`` -Makefile target after the merge commit was successfully built on the GitLab -instance in that deployment. - -Deploy the ``gitlab`` component of any main deployment just after pushing the PR -branch to GitHub. Run ``make -C terraform/gitlab/runner`` just before pushing -the merge commit to the GitLab instance in that deployment. - - -#3895 Setup CloudWatch alarm for ClamAV notifications -===================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` and ``gitlab`` components of any main deployment -just before pushing the merge commit to the GitLab instance in that deployment. - - -#5975 Upgrade ES domain for Hammerbox -===================================== - -Update the ``ES_INSTANCE_COUNT`` environment variable for any personal -deployments that are co-located with ``hammerbox``, using that deployment's -``environment.py`` as a template. - - -#5876 Upgrade dependencies 2024-02-05 -===================================== - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.11.8. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - -Operator -~~~~~~~~ - -Manually perform a two-phase deployment of the ``shared`` component of every -main deployment. Perform the first phase using the ``apply_keep_unused`` -Makefile target for the lower deployments after pushing the squashed and rebased -PR branch to GitHub. In a stable deployment (``prod``), perform the first phase -before pushing the merge commit to the GitLab instance in that deployment. In -lower and stable deployments, perform the second phase using the ``apply`` -Makefile target after the merge commit was successfully built on the GitLab -instance in that deployment. - -Deploy the ``gitlab`` component of any main deployment just after pushing the PR -branch to GitHub. Run ``make -C terraform/gitlab/runner`` just before pushing -the merge commit to the GitLab instance in that deployment. - - -#5728 Many stale images in gitlab-dind and GitLab registry -========================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4593 Refer to Docker images by repository digest -================================================= - -Operator -~~~~~~~~ - -Manually perform a two-phase deployment of the ``shared`` component of every -main deployment. Perform the first phase using the ``apply_keep_unused`` -Makefile target for the lower deployments after pushing the squashed and rebased -PR branch to GitHub. In a stable deployment (``prod``), perform the first phase -before pushing the merge commit to the GitLab instance in that deployment. In -lower and stable deployments, perform the second phase using the ``apply`` -Makefile target after the merge commit was successfully built on the GitLab -instance in that deployment. - - -#5467 Lambda servicecachehealth times out -========================================= - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5876 Upgrade dependencies 2024-01-22 -===================================== - -Everyone -~~~~~~~~ - -Update Terraform on your developer machines to version 1.6.6. - -Operator -~~~~~~~~ - -Manually perform a two-phase deployment of the ``shared`` component of every -main deployment. Perform the first phase using the ``apply_keep_unused`` -Makefile target for the lower deployments after pushing the squashed and rebased -PR branch to GitHub. In a stable deployment (``prod``), perform the first phase -before pushing the merge commit to the GitLab instance in that deployment. In -lower and stable deployments, perform the second phase using the ``apply`` -Makefile target after the merge commit was successfully built on the GitLab -instance in that deployment. - -Deploy the ``gitlab`` component of any main deployment just after pushing the PR -branch to GitHub. Run ``make -C terraform/gitlab/runner`` just before pushing -the merge commit to the GitLab instance in that deployment. - - -#5779 AWS event GetMacieSession results in AccessDenied error -============================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5809 Concurrently running GitLab test jobs occasionally time out -================================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5738 Suppress Inspector findings for images not used within boundary -===================================================================== - -Operator -~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5848 Dummy S3 lifecycle rule has auto-generated name -===================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the PR branch to the GitLab instance in that deployment. - - -#5847 Subgraph stitching query crashes BQ emulator -================================================== - -Operator -~~~~~~~~ - -Manually perform a two-phase deployment of the ``shared`` component of every -main deployment. In a lower deployment, perform the first phase using the -``apply_keep_unused`` Makefile target just before pushing the PR branch to the -GitLab instance in that deployment. In a stable deployment (``prod``), perform -the first phase before pushing the merge commit to the GitLab instance in that -deployment. In lower and stable deployments, perform the second phase using the -``apply`` Makefile target after the merge commit was successfully built on the -GitLab instance in that deployment. - - -#5687 Update Terraform to 1.6.x -=============================== - -Everyone -~~~~~~~~ - -Update Terraform on your developer machines to version 1.6.5. - - -#5046 Replace tinyquery with bigquery-emulator -============================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the PR branch to the GitLab instance in that deployment. - - -#5772 Upgrade dependencies 2023-12-11 -===================================== - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.11.7. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.shared`` just before -pushing the merge commit to the GitLab instance. - -Deploy the ``gitlab`` component of any main deployment, and run -``make -C terraform/gitlab/runner``, just before pushing the merge commit to -the GitLab instance in that deployment. - - -#5728 Many stale images in gitlab-dind and GitLab registry -========================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5723 Upgrade dependencies 2023-11-27 -===================================== - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.11.6. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - -Update Terraform on your developer machines to version 1.3.10. - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.shared`` just before -pushing the merge commit to the GitLab instance. - -Deploy the ``gitlab`` component of any main deployment, and run -``make -C terraform/gitlab/runner``, just before pushing the merge commit to -the GitLab instance in that deployment. - - - -#5536 Timebox DRS requests -========================== - -The AZUL_TERRA_TIMEOUT environment variable should be removed from any -``environment.py`` files for personal deployments that define it (uncommon). - - -#5703 Consolidate dependency updates into single bi-weekly issue -================================================================ - -Operator -~~~~~~~~ - -Run ``make -C terraform/gitlab/runner`` with the ``gitlab`` component of every -main deployment selected just before pushing the PR branch to the GitLab -instance in that deployment. If the PR has to be sent back, checkout ``develop`` -and run that command again in all deployments where it was run with the PR -branch checked out. - -Deploy the ``shared`` component of any main deployment just before pushing the -PR branch to the GitLab instance in that deployment. Do so with the PR branch -checked out. You will need to use the ``CI_COMMIT_REF_NAME=develop`` override -for that. Notify team members that their local development work will be impacted -until they rebase their branches to the PR branch or until this PR is merged and -they rebase their branches onto ``develop``. If the PR has to be sent back, -checkout ``develop`` and deploy the ``shared`` component again in any deployment -where it was deployed with the PR branch checked out, and notify the developers -to rebase their branches on ``develop`` again. - -Deploy the ``gitlab`` component of any main deployment just before pushing the -merge commit to the GitLab instance in that deployment. - - -#5561 Explicitly set no lifecycle rule in TF for buckets without one -==================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5617 False positive AWS Inspector findings after GitLab deploy -=============================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5612 AWS Inspector fails to post findings to SNS topic -======================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -DataBiosphere/azul-private#110 Reduce predictability of manifest keys -===================================================================== - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``gitlab`` component of the corresponding main -deployment. You will likely need assistance from the system administrator -because this particular change modifies the boundary policy. If the PR fails -during testing and is not merged, roll back the changes made to the main -deployments by deploying the ``gitlab`` component from the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.gitlab`` just before -pushing the merge commit to the GitLab instance. - - -#4982 Update to Python 3.11.x -============================= - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.11.5. In your working -copy, run ``make virtualenv`` and ``make requirements envhook``. - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.gitlab`` just before -pushing the merge commit to the GitLab instance. - -#5518 GitLab updates cause false positive insufficient_data alarms -================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -DataBiosphere/azul-private#108 Resolve vulnerabilities in docker image -====================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` & ``gitlab`` components (in that order) of any -main deployment, and with the ``gitlab`` component selected, run ``make -C -terraform/gitlab/runner`` just before pushing the merge commit to the GitLab -instance in that deployment. - - -DataBiosphere/azul-private#103 Resolve vulnerabilities in azul-pycharm -====================================================================== - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.gitlab`` just before -pushing the merge commit to the GitLab instance. - - -DataBiosphere/azul-private#93 Resolve vulnerabilities in azul-elasticsearch -=========================================================================== - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.shared`` just before -pushing the merge commit to the GitLab instance. - - -DataBiosphere/azul-private#94 Resolve vulnerabilities in azul-pycharm -===================================================================== - -Operator -~~~~~~~~ - -Before pushing the PR branch to the ``sandbox``, ``anvilbox``, or ``hammerbox`` -deployments, manually deploy the ``shared`` component of the corresponding main -deployment. If the PR fails during testing and is not merged, roll back the -changes made to the main deployments by deploying the ``shared`` component from -the ``develop`` branch. - -When deploying to ``prod``, manually deploy ``prod.gitlab`` just before -pushing the merge commit to the GitLab instance. - - -#5301 Alarm on detection of new vulnerabilities by Inspector -============================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` and ``gitlab`` component (in that order) of any -main deployment just before pushing the merge commit to the GitLab instance in -that deployment. - - -#5518 GitLab updates cause false positive insufficient_data alarms -================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5552 Increase retention of non-current object versions in shared bucket -======================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -DataBiosphere/azul-private#15 Insecure Transportation Security Protocol Supported (TLS 1.0) -=========================================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5189 Delete unused Docker images from ECR -========================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. Retain a -terminal transcript for each deployment so that the author can diagnose any -issues that may come up. - - -#4468 Logs by different containers are hard to distinguish -========================================================== - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5408 Prepare for vacation -========================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. Only the -``prod.gitlab`` deployment should actually have a non-empty plan. - - -DataBiosphere/azul-private#95 Resolve vulnerabilities in AMI for GitLab -======================================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5301 Alarm on detection of new vulnerabilities by Inspector -============================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5363 Noisy alarm from EC2 for CreateNetworkInterface during initial deploy -=========================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5408 Prepare for vacation -========================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5139 CloudWatch metrics and alarms for GitLab EC2 instance -=========================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5155 Update AnVIL catalogs in `anvilprod` with replacement snapshots -===================================================================== - -Update the snapshots for any personal deployments that share an Elasticsearch -domain with ``hammerbox``, using that deployment's ``environment.py`` as a -template. - - -#5413 Make anvildev and anvilbox public -======================================= - -Operator -~~~~~~~~ - -The ``deploy`` job will fail for ``anvildev`` when building the merge commit on -the ``develop`` branch. It may also fail for ``anvilbox`` when building the feature -branch. The expected failure produces the following output:: - - ╷ - │ Error: updating REST API (1yxdxpa3db): BadRequestException: Cannot update endpoint from PRIVATE to EDGE - │ - │ with aws_api_gateway_rest_api.indexer, - │ on api_gateway.tf.json line 862, in resource[6].aws_api_gateway_rest_api[0].indexer: - │ 862: } - │ - ╵ - ╷ - │ Error: updating REST API (pmmwi1i8la): BadRequestException: Cannot update endpoint from PRIVATE to EDGE - │ - │ with aws_api_gateway_rest_api.service, - │ on api_gateway.tf.json line 1467, in resource[24].aws_api_gateway_rest_api[0].service: - │ 1467: } - │ - ╵ - -To work around this, check out the respective branch and perform the commands -below. If you have the feature branch checked out, you will need to prefix the -``make`` invocations with ``CI_COMMIT_REF_NAME=develop``. :: - - make lambdas - cd terraform - make validate - terraform taint aws_api_gateway_rest_api.indexer - terraform taint aws_api_gateway_rest_api.service - -Retry the ``deploy`` job on GitLab. It should succeed now. If the subsequent -``integration_test`` job fails with 403 or 503 errors returned by the service or -indexer, simply retry it. It appears that the edge distribution process in AWS -is subject to several minutes of latency aka eventual consistency. - - -#5292 Update/harden docker.elastic.co/elasticsearch/elasticsearch -================================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5407 False positive for unauthorized alarm from MandoService -============================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5298 Keep docker Docker images updated -======================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` & ``gitlab`` components (in that order) of any -main deployment, and with the ``gitlab`` component selected, run ``make -C -terraform/gitlab/runner`` just before pushing the merge commit to the GitLab -instance in that deployment. - - -#5400 Make anvilprod public -=========================== - -Operator -~~~~~~~~ - -The ``deploy`` job will fail for ``anvilprod`` when building the merge commit on -the ``develop`` branch. It may also fail for ``hammerbox`` when building the feature -branch. The expected failure produces the following output:: - - ╷ - │ Error: updating REST API (1yxdxpa3db): BadRequestException: Cannot update endpoint from PRIVATE to EDGE - │ - │ with aws_api_gateway_rest_api.indexer, - │ on api_gateway.tf.json line 862, in resource[6].aws_api_gateway_rest_api[0].indexer: - │ 862: } - │ - ╵ - ╷ - │ Error: updating REST API (pmmwi1i8la): BadRequestException: Cannot update endpoint from PRIVATE to EDGE - │ - │ with aws_api_gateway_rest_api.service, - │ on api_gateway.tf.json line 1467, in resource[24].aws_api_gateway_rest_api[0].service: - │ 1467: } - │ - ╵ - -To work around this, check out the respective branch perform the commands below. -If you have the feature branch checked out, you will need to prefix the ``make`` -invocations with ``CI_COMMIT_REF_NAME=develop``. :: - - make lambdas - cd terraform - make validate - terraform taint aws_api_gateway_rest_api.indexer - terraform taint aws_api_gateway_rest_api.service - -Retry the ``deploy`` job on GitLab. It should succeed now. If the subsequent -``integration_test`` job fails with 403 or 503 errors returned by the service or -indexer, simply retry it. It appears that the edge distribution process in AWS -is subject to several minutes of latency aka eventual consistency. - - -#5189 Delete unused Docker images from ECR -========================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5291 Suppress unauthorized alarms for visiting Inspector console -================================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5299 Keep Python updated -========================= - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.9.17. - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5289 Fix: _select doesn't validate its argument -================================================ - -Set the environment variable ``azul_google_user`` in all deployments to your -``…@ucsc.edu`` email address. The easiest way to do that is in an -``environment.local.py`` at the project root. - -Many of the shell functions defined in ``environment`` have been renamed. To -avoid stale copies of these functions lingering around under their old names, -exit all shells in which you sourced that file. - - -#5325 Exclude noisy events from api_unauthorized alarm -====================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5280 Enable FIPS mode on GitLab instance -========================================= - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5283: Swap anvilprod and anvildev -================================== - -Update any personal deployments you own in AWS account ``platform-anvil-dev`` to -mirror the configuration of the ``anvilbox`` deployment. Specifically, you will -need to update the list of sources for the ``anvil`` catalog and the TDR and SAM -endpoints. You will also need to ask the system administrator to move the Terra -group memebership of the indexer service account of any such personal deployment -from ``azul-anvil-prod`` in Terra production to ``azul-anvil-dev`` in TDR -development. Redeploy and reindex those deployments after updating their -configuration. - -All indices in the Elasticsearch domains for ``anvildev`` and ``anvilbox`` have -been deleted, including the indices of personal deployments that share an -Elasticsearch domain with ``anvilbox``, regardless of whether these indices -contained managed-access or public snapshots. In order to recover from the loss -of these indices in your personal deployment, you will need to reindex that -deployment. - - -#5260 Fix: Inconsistent bucket names and CloudFront origin IDs in anvildev -========================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of the ``anvildev`` deployment just -before pushing the merge commit to the GitLab instance in that deployment. When -the ``deploy_browser`` job of the ``deploy`` stage fails on GitLab, manually -empty and delete the S3 buckets ``anvil.explorer.gi.ucsc.edu`` and -``anvil.gi.ucsc.edu`` in ``platform-anvil-dev`` . Retry the job. - - -#5226 Sporadic DNS resolution errors on GitLab -============================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5232 Fix: Operators should have SSH access to anvildev and anvilprod -===================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment, except -``prod``, just before pushing the merge commit to the GitLab instance in that -deployment. - - -#5015 Prepare platform-anvil-prod for compliance assessment -=========================================================== - -Everyone -~~~~~~~~ - -Update Python on your developer machines to version 3.9.16. - -Create a `personal access token`_ on every GitLab instance you have access to -and specify that token as the value of the ``azul_gitlab_access_token`` in your -``environment.local.py`` for the main deployment collocated with that instance. -See the documentation of that variable in the top-level ``environment.py`` for -the set of scopes (permissions) to be assigned to the token. Refresh the -environment and run ``_preauth``. - -.. _personal access token: https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html - -Operator -~~~~~~~~ - -Follow the steps for everyone listed above. - -Just before pushing the feature branch to a GitLab instance, locally merge the -feature branch into ``develop`` — without pushing the resultimg merge commit — -and deploy the merge commit to the ``shared`` & ``gitlab`` components (in that -order) of the main deployment for that GitLab instance. When the PR cannot be -merged for any reason, undo the merge locally by resetting the ``develop`` -branch to the prior commit and manually deploy the ``develop`` branch to -``shared`` & ``gitlab`` components (in that order) of the main deployment for -that GitLab instance. - -If deploying the ``gitlab`` component results in an ``OptInRequired`` error, -login to the AWS Console using credentials for the AWS account that contains the -GitLab instance and visit the URL that is included in the error message. This -will enable the required AWS Marketplace subscription for the CIS-hardened -image. - -With the ``gitlab`` component selected, run ``make -C terraform/gitlab/runner``. - -#3894 Send GitLab host logs to CloudWatch -========================================= - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5207 Fix: Partition sizing ignores supplementary bundles -========================================================= - -Subgraph counts have been updated for `anvildev` and `anvilbox`. If you have any -personal deployments that index these snapshots, update the subgraph counts -accordingly. - - -#4022 Encrypt GitLab data and root volume and snapshots -======================================================= - -Operator -~~~~~~~~ - -Prior to pushing the merge commit to a GitLab instance, login to the AWS -Console and navigate to `EC2` -> `Instances` -> select the GitLab instance -> -`Storage` to confirm that root volume is encrypted. - -If the root volume is not encrypted, manually deploy the ``gitlab`` component of -a deployment just before pushing the merge commit to the GitLab instance in that -deployment. - - -#5043 S3 server access logs are inherently incomplete -===================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5133 Trigger an alarm on absence of logs -========================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5110 Update GitLab IAM policy for FedRAMP inventory -==================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4218 Configure WAF with rules -============================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment immediately -before the first time this change is pushed to the GitLab instance for that -main deployment, regardless of whether the changes come as part of a feature -branch, a merge commit or in a promotion. - - -#3911 Disallow ``||`` joiners in metadata -========================================= - -A new catalog ``dcp3`` has been added to ``dev`` and ``sandbox`` deployments. -Add the ``dcp3`` catalog to your personal deployments using the sandbox -deployment's ``environment.py`` as a model. - - -#5116 Enable NIST 800.53 conformance pack for AWS Config -======================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4713 S3 Block Public Access setting should be enabled -====================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#5071 s3_access_log_bucket_policy includes redundant condition on source account -================================================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4960 S3 server access logging for shared bucket -================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4189 Scan GitLab EC2 instance with Amazon Inspector -==================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just *before* -pushing the merge commit to the GitLab instance in that deployment. The -Terraform code that enables Amazon Inspector is currently unreliable. Check -the Amazon Inspector console to see if it is enabled. If you see a *Get -started …* button, it is not, and you need to repeat this step. - - -#5019 Index public & mock-MA snapshots in anvilprod -=================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just *before* -pushing the merge commit to the GitLab instance in that deployment. - - -#3634 Automate creation of a FedRAMP Integrated Inventory Workbook -================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment just *before* -pushing the merge commit to the GitLab instance in that deployment. - -Afterwards, edit the existing schedule in the Azul project on that GitLab -instance. Its description is ``Sell unused BigQuery slot commitments``. You may -need to ask a system administrator to perform make these changes on your behalf. - -1) Set the Cron timezone to ``Pacific Time (US & Canada)`` - -2) Set the variable ``azul_gitlab_schedule`` to ``sell_unused_slots`` - - -Add another schedule: - -1) Set the description to ``Prepare FedRAMP inventory`` - -2) Set the interval pattern to ``0 4 * * *`` - -3) Set the Cron timezone to ``Pacific Time (US & Canada)`` - -4) Set the variable ``azul_gitlab_schedule`` to ``fedramp_inventory`` - - -#5004 Enable access logging on AWS Config bucket -================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4176 Enable VPC flow logs -========================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` and ``shared`` components of any main deployment -just before pushing the merge commit to the GitLab instance in that deployment. - - -#4918 Rename shared (aka versioned aka config) bucket (PR 2 of 2) -================================================================= - -This change removes the old shared (aka versioned aka config) bucket and -switches all deployments to the replacement. - -Everyone -~~~~~~~~ - -When requested by the operator, remove the ``AZUL_VERSIONED_BUCKET`` variable -from all of your personal deployments, then deploy this change to all of them. -Notify the operator when done. - -Operator -~~~~~~~~ - -1. After pushing the merge commit for this change to ``develop`` on GitHub, - request that team members upgrade their personal deployments. Request that - team members report back when done. - -2. Manually deploy the ``gitlab`` component of any main deployment just *before* - pushing the merge commit to the GitLab instance in that deployment. - -3. Manually deploy the ``shared`` component of any main deployment just *after* - this change was deployed to all collocated deployments, both personal and - shared ones. - -Promote this change separately from the previous one, and when promoting it, -follow steps 2 and 3 above. - - -#4918 Rename shared (aka versioned aka config) bucket (PR 1 of 2) -================================================================= - -This change creates the new bucket with the correct name, sets up replication -between the old and the new bucket so that future object versions are copied, -and runs a batch migration of prior and current objects versions. The next PR -will actually switch all deployments to using the new bucket. - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4966 Chatbot role policy is too restrictive and causes persistent alarms -========================================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4958 Storage bucket is still being removed from TF state -========================================================= - -Everyone -~~~~~~~~ - -PR #4926 for issue #4646 left in place code to remove the S3 storage bucket -from the Terraform state. We'll refer to the changes from that PR as *broken* -and the changes for #4958 described here as *this fix*. The broken upgrading -instructions have been deprecated. When you follow these instructions, be -sure you have this fix checked out, or a commit that includes it. - -There are three possible cases to consider when upgrading a deployment. Pick -the one applicable to the deployment being upgraded and only follow the steps -listed under that case: - -Case A: - If you have already deployed the broken changes once, and have not yet - attempted to deploy again, verify that :: - - (cd terraform && make init && terraform state show aws_s3_bucket.storage) - - produces output that includes the following lines:: - - # aws_s3_bucket.storage: - resource "aws_s3_bucket" "storage" { - - Then deploy this fix. - -Case B: - If you have already deployed the broken changes, and then attempted to - deploy them again, the affected deployment needs to be repaired. A symptom - of the breakage is that the command :: - - (cd terraform && make init && terraform state show aws_s3_bucket.storage) - - fails with the message *No instance found for the given address*. - - To repair the deployment, run :: - - (cd terraform && make validate && terraform import aws_s3_bucket.storage $AZUL_S3_BUCKET) - - Then deploy this fix. Afterwards, confirm that :: - - (cd terraform && make init && terraform state show aws_s3_bucket.storage) - - produces no error but instead output that includes the following lines:: - - # aws_s3_bucket.storage: - resource "aws_s3_bucket" "storage" { - -Case C: - If you have *not* yet deployed the broken changes, first run the following - command:: - - (cd terraform && make init && terraform state rm aws_s3_bucket.storage) - - This will cause Terraform to leave the old bucket in place when you - deploy this fix, and create a new one alongside it. - - Next, in personal deployments only, specify a name for the new bucket by - changing the value of ``AZUL_S3_BUCKET`` in ``environment.py`` to :: - - "edu-ucsc-gi-{account}-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}" - - where ``{account}`` is the name of the AWS account hosting the deployment, - e.g., ``"platform-hca-dev"``. As always, use the sandbox deployment's - ``environment.py`` as a model when upgrading personal deployments. - - For main deployments, the update to ``AZUL_S3_BUCKET`` has already been - made. - - Then deploy this fix. **Afterwards, manually delete the old storage bucket - for the deployment.** - - Finally, verify that :: - - (cd terraform && make init && terraform state show aws_s3_bucket.storage) - - produces output that includes the following lines :: - - # aws_s3_bucket.storage: - resource "aws_s3_bucket" "storage" { - -Operator -~~~~~~~~ - -Follow the instructions in case A above for ``sandbox``, ``dev``, -``anvilbox``, and ``anvildev``. As part of the now deprecated upgrading steps -for #4646, the old storage buckets for these deployments should already have -been removed. Confirm that this is still the case. - -Announce for other developers to upgrade their personal deployments. - -When promoting this fix to ``prod``, follow the instructions in case C above. - - -#4646 Rename Azul storage buckets -================================= - -This section has been deprecated. If you've already followed the steps -included here, please read the section for #4958 above. - -|deprecated| - -After these changes are successfully merged to ``develop``, manually delete the -old storage buckets for ``sandbox``, ``dev``, ``anvilbox``, and ``anvildev``. -Then announce for all other developers to follow the instructions in the section -below. - -After these changes are successfully merged to ``prod``, manually delete the old -storage bucket for ``prod``. - -Everyone -~~~~~~~~ - -For each of your personal deployments, change the value of ``AZUL_S3_BUCKET`` in -``environment.py`` to :: - - "edu-ucsc-gi-{account}-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}" - -Where ``{account}`` is the name of the AWS account hosting the deployment, e.g., -``"platform-hca-dev"``. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - -After the changes are deployed to a given personal deployment, manually delete -the old storage bucket for that deployment. - -|end_deprecated| - - -#4011 Integrate monitoring SNS topic with Slack -=============================================== - -Operator -~~~~~~~~ - -Before pushing a merge commit with these changes to a GitLab instance, `set up -AWS Chatbot <./README.md#313-aws-chatbot-integration-with-slack>`_ in the AWS -account hosting that instance. AWS Chatbot has already been set up in the -``platform-hca-dev`` account. Once AWS Chatbot is set up, manually deploy the -``shared`` component of the main deployment collocated with the GitLab instance -you will be pushing to. - - -#4673 Eliminate burner accounts -=============================== - -Operator -~~~~~~~~ - -Complete the steps in the next section. Then announce on `#team-boardwalk` for -other developers to do the same. - -Everyone -~~~~~~~~ - -When notified by the operator, complete the following steps: - -#. Remove your burner account from the Google Cloud project: - - #. Go to the Google Cloud console, select the `platform-hca-dev` project, - and navigate to ``IAM & Admin`` -> ``IAM`` - - #. Select your burner; it includes the string "…ucsc.edu@gmail.com" - - #. Click ``REMOVE ACCESS`` -> ``CONFIRM`` - -#. Close your burner Google account: - - #. Sign in to Google using your burner email account. Click on the icon with - your burner's name initial (upper right-hand of the page), click the - ``Manage your Google Account`` button, and navigate to ``Data & Privacy`` - - #. At the bottom of the page, under ``More options``, click on the - ``Delete your Google Account`` button. Complete Google's requisites and - terminate your burner account by clicking on ``Delete Account`` - -#. Make sure to register your UCSC account with SAM as `described - <./README.md#234-google-cloud-tdr-and-sam>`_ in the README. - - -#4907 CIS 2.6 (S3 access logging on CloudTrail bucket) still flagged in dev -=========================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``dev.shared`` component just before pushing the merge -commit to GitLab ``dev``. - - -#4880 Alarms for CIS recommendations treat missing data as OK -============================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4832 Disable original CloudTrail trail -======================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. This -deployment is expected not to change any resources; everything should be handled -by the ``rename_resources`` script. Do not proceed with the deployment if the -plan shows any changes to the resources. - - -#4794 Ensure log metric filters and alarms exist for CIS recommendations -======================================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4807 Move monitoring SNS topic to shared component -=================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``gitlab`` component of any main deployment immediately -before the first time this change is pushed to the GitLab instance for that -main deployment, regardless of whether the changes come as part of a feature -branch, a merge commit or in a promotion. This is to ensure that the GitLab -instance has sufficient permissions to deploy these changes. - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. Expect to -confirm the SNS subscription for each deployment while doing so. - - -#4792 Ensure S3 bucket access logging is enabled on the CloudTrail S3 bucket -============================================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4831 Move CloudTrail trail to default region -============================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4764 Ensure security contact information is registered -======================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4692 Ensure IAM password policies have strong configurations -============================================================= - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4793 Create support role to manage incidents with AWS support -============================================================== - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4196 Enable sending of CloudTrail events to CloudWatch logs -============================================================ - -Operator -~~~~~~~~ - -Manually deploy the ``shared`` component of any main deployment just before -pushing the merge commit to the GitLab instance in that deployment. - - -#4224 Eliminate personal service accounts -========================================= - -When this PR lands in the main deployment in a given Google cloud project, the -operator should perform the following steps *in that project*, and then announce -for the other developers to do the same *in that project*. - -#. Delete your personal Google service account: - - #. Go to the Google Cloud console, select the appropriate project, and - navigate to ``IAM & Admin`` -> ``Service Accounts`` - - #. Select your personal service account. This is the one where the part - before the ``@`` symbol exactly matches your email address; it does not - include the string "azul"). - - #. Click ``DISABLE SERVICE ACCOUNT`` -> ``DISABLE``. - - #. Click ``DELETE SERVICE ACCOUNT`` -> ``DELETE``. - -#. Delete the local file containing the private key of the service account that - you deleted during step 1. Such files are usually stored in ``~/.gcp/``. - -#. Remove the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable from - ``environment.local.py`` for all Azul deployments (including non-personal - deployments) where that variable references the key file that you deleted in - step 2. - -#. For clarity's sake, remove comments referencing the - ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable from - ``environment.py`` for all personal deployments that were changed during step - 3. As always, use the sandbox deployment's ``environment.py`` as a model when - upgrading personal deployments. - - -#4752 On replacement, Terraform creates ES domain before deleting it -==================================================================== - -Note: The ``apply`` and ``auto_apply`` targets in ``terraform/Makefile`` do not -recurse into the sibling ``lambdas`` directory anymore. The only way to get a -proper deployment is to run ``make deploy`` or ``make auto_deploy`` in the -project root. This change speeds up the ``apply`` and ``auto_apply`` targets -for those who know what they are doing™. - -Note: The ``post_deploy`` target is gone. The ``deploy`` target has been renamed -to ``terraform``. The new ``deploy`` target depends on the ``terraform`` target -and invokes the post-deplot scripts directly. The same goes for ``auto_deploy`` -and ``auto_terraform`` respectively. - -Ensure that the ``comm`` utility is installed. The `clean` target in most -Makefiles depends on it. - -This is a complicated change that involves renaming lots of resources, both in -TF config and in state. If a deployment is stale or borked, upgrading to this -change is just going to make things worse. Before upgrading any deployment to -this commit, or more precisely, the merge commit that introduces this change, -first check out the previous merge commit, and deploy while following any -upgrade instructions up to that commit. Then run ``make clean``, check out this -commit and run ``make deploy``. - - -#4688 Fix: Elasticsearch domains should be in a VPC -=================================================== - -Everyone -~~~~~~~~ - -Perform the steps listed below for all personal deployments that don't share an -ES domain with a shared deployment. The deletion of the ES domain will cascade -to many other resources that depend on it. Once the deletion is complete, it is -necessary to re-deploy the missing resources and perform a reindex to repopulate -the newly created ES domain:: - - (cd terraform && make validate && terraform destroy -target aws_elasticsearch_domain.index) - make deploy - make reindex - -Operator -~~~~~~~~ - -Before pushing the PR branch to ``sandbox`` or ``anvilbox``, notify the team -that personal deployments sharing the Elasticsearch domain with that deployment -will lose their indices. - -For any shared deployment, perform the first of the above steps after the -GitLab ``deploy`` job fails in that deployment. Then retry the ``deploy`` job. -When that succeeds, start the ``reindex`` or ``early_reindex`` job. - -When reindexing completes in the ``sandbox`` or ``anvilbox`` deployments, -request that team members re-deploy and reindex all personal deployments that -share the Elasticsearch domain with that deployment. - - -#4334 Upgrade Terraform CLI to 1.3.4 -==================================== - -Before upgrading personal deployments, install Terraform 1.3.4 as `described -<./README.md#21-development-prerequisites>`_ in our README. Then run ``make -deploy``. - - -#4690 Fix: EC2 instances should use Instance Metadata Service Version 2 (IMDSv2) -================================================================================ - -Operator -~~~~~~~~ - -The steps below have already been performed on ``anvildev.gitlab``, but need to -be run for ``dev.gitlab`` and ``prod.gitlab``, run:: - - _select dev.gitlab - make -C terraform/gitlab - - -#4691 Fix: S3 Block Public Access setting should be enabled at the bucket-level -=============================================================================== - -This change blocks public access for all S3 buckets in the shared component and -in all deployments. - -Everyone -~~~~~~~~ - -Run `make deploy` to update personal deployments as soon as your are notified on -Slack by the operator. - -Operator -~~~~~~~~ - -Follow these steps to deploy for ``dev.shared``, ``anvildev.shared``, and -``prod.shared``:: - - _select dev.shared - make -C $project_root/terraform/shared apply - - -#4625 Disable URL shortener -=========================== - -Everyone -~~~~~~~~ - -In personal deployments, remove ``AZUL_URL_REDIRECT_BASE_DOMAIN_NAME`` and -``AZUL_URL_REDIRECT_FULL_DOMAIN_NAME``. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - -Operator -~~~~~~~~ - -After this change lands in ``dev``, follow these instructions for the AWS -account ``platform-hca-dev``: - -#. Ask everyone to upgrade their personal deployments in that account. - -#. In the AWS console, navigate to *Route53 service* → *Hosted zones*. - -#. Open the hosted zone ``dev.url.singlecell.gi.ucsc.edu`` and check for - records of type ``CNAME``. If there are any, contact the owner of the - corresponding deployment. Their deployment wasn't upgraded properly. As a - last resort, remove the CNAME record. If there are records for the - ``sandbox`` or ``dev`` deployments, contact the lead. Ultimately, there - should only be SOA and NS records left. - -#. Delete the hosted zone ``dev.url.singlecell.gi.ucsc.edu``. - -#. Delete the hosted zone ``url.singlecell.gi.ucsc.edu``. - -#. In the ``singlecell.gi.ucsc.edu`` zone, delete the record for - ``url.singlecell.gi.ucsc.edu``. - -After this change lands in ``anvildev``, follow these instructions for the AWS -account ``platform-anvil-dev``: - -#. Ask everyone to bring their personal deployments in that account - up to date with ``develop``. - -#. In the AWS console, navigate to *Route53 service* → *Hosted zones*. - -#. Select ``anvil.gi.ucsc.edu`` and check for records beginning with ``url.``. - If there are any, contact the owner of the corresponding deployment. Their - deployment wasn't upgraded properly. If there are records for the - ``anvilbox`` or ``anvildev`` deployments, contact the lead. As a last - resort, remove the record. - -After completing the above two sections, ask the lead to deploy the -``dev.gitlab``, and ``anvildev.gitlab`` components. Nothing needs to be done -for ``prod.gitlab``. - -After this change lands in ``prod``, follow these instructions for AWS account -``platform-hca-prod``: - -#. In the AWS console, navigate to *Route53 service* → *Hosted zones*. - -#. Open the hosted zone ``azul.data.humancellatlas.org`` and check for a - record called ``url.azul.data.humancellatlas.org`` record. There should be - none. If there is, contact the lead. - -#. In the ``data.humancellatlas.org`` zone, delete the record for - ``url.data.humancellatlas.org``. - - - -#4648 Move GitLab ALB access logs to shared bucket -================================================== - -A new bucket in the ``shared`` component will reveived the GitLab ALB access -logs previously hosted in a dedicated bucket in the ``gitlab`` component. The -steps below have already been performed on ``dev`` and ``anvildev`` but need to -be run for ``prod`` before pushing the merge commit:: - - _select prod.shared - cd terraform/shared - make - cd ../gitlab - _select prod.gitlab - make - -This will fail to destroy the non-empty bucket. Move the contents of the old -bucket to the new one:: - - aws s3 sync s3://edu-ucsc-gi-singlecell-azul-gitlab-prod-us-east-1/logs/alb s3://edu-ucsc-gi-platform-hca-prod-logs.us-east-1/alb/access/prod/gitlab/ - aws s3 rm --recursive s3://edu-ucsc-gi-singlecell-azul-gitlab-prod-us-east-1/logs/alb - make - -If this fails with an error message about a non-empty state for an orphaned -bucket resource, the following will fix that:: - - terraform state rm aws_s3_bucket.gitlab - make - - -#4174 Enable GuardDuty and SecurityHub -====================================== - -This change enables the AWS Config, GuardDuty, and SecurityHub services, -deployed as part of the ``shared`` Terraform component. Prior to deploy, the -operator must ensure these services are currently not active and disable/remove -any that are. Use the AWS CLI's _list_ and _describe_ functionality to obtain -the status of each service, and the CLI's _delete_ and _disable_ functionality -to remove the ones that are active :: - - _select dev.shared - - aws configservice describe-configuration-recorders - aws configservice delete-configuration-recorder --configuration-recorder-name - - aws configservice describe-delivery-channels - aws configservice delete-delivery-channel --delivery-channel-name - - aws guardduty list-detectors - aws guardduty delete-detector --detector-id - - aws securityhub get-enabled-standards - aws securityhub batch-disable-standards --standards-subscription-arns - - aws securityhub describe-hub - aws securityhub disable-security-hub - -After ensuring the services are disabled, follow these steps to deploy for the -``dev.shared``, ``anvildev.shared``, and ``prod.shared`` deployments :: - - _select dev.shared - cd $project_root/terraform/shared - make apply - - -#4190 Create SNS topic for monitoring and security notifications -================================================================ - -A new environment variable called ``AZUL_MONITORING_EMAIL`` has been added. In -personal deployments, set this variable to ``'{AZUL_OWNER}'``. As always, use -the sandbox deployment's ``environment.py`` as a model when upgrading personal -deployments. - -Note: The SNS topic and email subscription will only be created for deployments -that have ``AZUL_ENABLE_MONITORING`` enabled, which is typically the case in -main deployments only. - -**IMPORTANT**: The SNS topic subscription will be created with a status of -"pending confirmation". Instead of simply clicking the link in the "Subscription -Confirmation" email, you should follow the instructions given during the -``make deploy`` process to confirm the subscription. - - -#4122 Create AnVIL deployments of Azul and Data Browser -======================================================= - -Everyone -~~~~~~~~ - -In personal deployments dedicated to AnVIL, set ``AZUL_BILLING`` to ``'anvil'``, -set it to ``'hca'`` in all other personal deployments. - -In personal deployments, set ``AZUL_VERSIONED_BUCKET`` and ``AZUL_S3_BUCKET`` to -the same value as in the ``sandbox`` deployment. - -In personal deployments, remove ``AZUL_URL_REDIRECT_FULL_DOMAIN_NAME`` if its -value is (``'{AZUL_DEPLOYMENT_STAGE}.{AZUL_URL_REDIRECT_BASE_DOMAIN_NAME}'``. - -In ``environment.py`` for personal deployments, initialize the ``is_sandbox`` -variable to ``False``, replacing the dynamic initializer, and copy the -definition of the ``AZUL_IS_SANDBOX`` environment variable from sandbox' -``environment.py``. This will make it easier in the future to synchronize your -deployments' ``environment.py`` with that of the sandbox. - -Operator -~~~~~~~~ - -Run :: - - _select dev.shared # or prod.shared - cd terraform/shared - make validate - terraform import aws_s3_bucket.versioned $AZUL_VERSIONED_BUCKET - terraform import aws_s3_bucket_versioning.versioned $AZUL_VERSIONED_BUCKET - terraform import aws_s3_bucket_lifecycle_configuration.versioned $AZUL_VERSIONED_BUCKET - terraform import aws_api_gateway_account.shared api-gateway-account - terraform import aws_iam_role.api_gateway azul-api_gateway - -Repeat for ``shared.prod``. - -Redeploy the ``shared.dev`, ``gitlab.dev``, ``shared.prod`, and ``gitlab.prod`` -components to apply the needed changes to any resources. - - -#4224 Index ENCODE snapshot as PoC -================================== - -Replace ``'tdr'`` with ``'tdr_hca'`` in the repository plugin configuration for -the ``AZUL_CATALOGS`` variable in your personal deployments. As always, use the -sandbox deployment's ``environment.py`` as a model when upgrading personal -deployments. - - -#4197 Manage CloudTrail trail in 'shared' TF component -====================================================== - -This change adds a ``shared`` terraform component to allow Terraform to manage -the existing CloudTrail resources on `develop` and `prod`. To import these -resources into Terraform, the operator must run the following steps after the -change has been merged into the respective branches. - -For `develop` :: - - git checkout develop - _select dev.shared - cd $project_root/terraform/shared - make config - terraform import aws_s3_bucket.cloudtrail_shared "edu-ucsc-gi-platform-hca-dev-cloudtrail" - terraform import aws_s3_bucket_policy.cloudtrail_shared "edu-ucsc-gi-platform-hca-dev-cloudtrail" - aws cloudtrail delete-trail --name Default - make apply - -For `prod` :: - - git checkout prod - _select prod.shared - cd $project_root/terraform/shared - make config - terraform import aws_s3_bucket.cloudtrail_shared "edu-ucsc-gi-platform-hca-prod-cloudtrail" - terraform import aws_s3_bucket_policy.cloudtrail_shared "edu-ucsc-gi-platform-hca-prod-cloudtrail" - aws cloudtrail delete-trail --name platform-hca-cloudtrail - make apply - - -#4001 Put API Gateway behind GitLab VPC -======================================= - -A new configuration variable has been added, ``AZUL_PRIVATE_API``. Set this -variable's value to ``1`` to place the deployment's API Gateway in the -GitLab VPC, thus requiring use of a VPN connection to access to the deployment. - -Note that when changing the variable's value from ``0`` to ``1`` or vice versa, -the deployment must first be destroyed (``make -C terraform destroy``), and -``AZUL_DEPLOYMENT_INCARNATION`` incremented before the change can be deployed. -Refer to the `Private API` section of the README for more information. - - -#4170 Update Python to 3.9.x -============================ - -Update your local Python installation to 3.9.12. In your working copy, run -``make virtualenv`` and ``make requirements envhook``. - -Reconcile the import section in your personal deployments' ``environment.py`` -with that in the sandbox's copy of that file. Some of the imports from the -``typing`` module have been removed or replaced with imports from other modules, -like ``collections.abc``. - - -#3530 Remove AZUL_PARTITION_PREFIX_LENGTH -========================================= - -The environment variable ``AZUL_PARTITION_PREFIX_LENGTH`` has been removed. -Ensure that all configured sources specify their own partition prefix length. -As always, use the sandbox deployment's ``environment.py`` as a model when -upgrading personal deployments. - - -#4048 Remove JsonObject -======================= - -Run ``make clean`` to remove any left-over unpacked wheel distributions. - -Run ``pip uninstall jsonobject`` to deinstall JsonObject. If that gives you -trouble, run :: - - deactivate ; make virtualenv && source .venv/bin/activate && make requirements envhook - -instead. - - -#3073 Move parsing of prefix to SourceSpec -========================================== - -The ``AZUL_DSS_ENDPOINT`` environment variable has been replaced with -``AZUL_DSS_SOURCE``. If a deployment needs to be updated, refer to the root -``environment.py`` file for the updated EBNF syntax. - - -#3605 Place GitLab behind VPN -============================= - -Follow the instructions in the README on `requesting VPN access to GitLab`_ for -both ``dev.gitlab`` and ``prod.gitlab``. - -.. _requesting VPN access to GitLab: ./README.md#911-requesting-access - -Upgrade to Terraform 0.12.31 and run ``make deploy`` in every personal -deployment. - - -#3796 Fix: Can't easily override AZUL_DEBUG for all deployments locally -======================================================================= - -This changes the precedence of ``environment.py`` and ``environment.local.py`` -files. Previously, the precedence was as follows (from high to low, with -``dev.gitlab`` selected as an example): - -1) deployments/dev.gitlab/environment.py.local -2) deployments/dev.gitlab/environment.py -3) deployments/dev/environment.py.local -4) deployments/dev/environment.py -5) environment.py.local -6) environment.py - -The new order of precedence is - -1) deployments/dev.gitlab/environment.py.local -2) deployments/dev/environment.py.local -3) environment.py.local -4) deployments/dev.gitlab/environment.py -5) deployments/dev/environment.py -6) environment.py - -Before this change, it wasn't possible to override, say, ``AZUL_DEBUG`` for all -deployments using a ``environment.py.local`` in the project root because the -setting of that variable in ``deployments/*/environment.py`` would have taken -precedence. One would have had to specify an override in every -``deployments/*/environment.local.py``. - -You may need to adjust your personal deployment's ``environment.py`` file -and/or any ``environment.local.py`` you may have created. - - -#3006 Upgrade to ElasticSearch 7.10 -=================================== - -This will destroy and recreate the ES domain for all main deployments, including -``sandbox`` which hosts the ES indices for typical personal deployments. If your -personal deployment shares the ES instance with the ``sandbox`` deployment, you -will need to run ``make reindex`` to repopulate your indices on the new ES -domain. In the uncommon case that your personal deployment uses its own ES -domain, update ``AZUL_ES_INSTANCE_TYPE`` and ``AZUL_ES_VOLUME_SIZE`` to be -consistent with what the ``sandbox`` deployment uses. Then run ``make deploy`` -and ``make reindex``. - -For main deployments, the operator needs to manually delete the deployement's -existing Elasticsearch domain before initiating the GitLab build. - - -#3561 Fix: Listing bundles for a snapshot gives zero bundles -============================================================ - -The definition of the ``mksrc`` function and the source configuration for the -``dcp2`` catalog have been updated. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - - -#3113 IT catalog names are inconsistent -======================================= - -The format of IT catalog name has been updated. IT catalog names are composed by -appending ``-it`` to the end of a primary catalog name. (e.g. dcp2, dcp2-it). -The regular expression that validates an IT catalog name can be found at -``azul.Config.Catalog._it_catalog_re``. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - - -#3515 Reduce number of shards for IT catalogs -============================================= - -The configuration will take effect in the next IT run after deleting the old -indices. To delete them run:: - - python scripts/reindex.py --catalogs it it2 --delete --index - - -#3439 Upgrade Python runtime to 3.8.12 -====================================== - -Update Python to 3.8.12 - - -#3552 Index updated snapshot into dcp2 on dev -============================================= - -A snapshot was updated in ``dcp2_sources``. As always, use the sandbox -deployment's ``environment.py`` as a model when upgrading personal deployments. - - -#3114 Define sources within catalog JSON -======================================== - -The ``AZUL_TDR_SOURCES`` and ``AZUL_…_SOURCES`` environment variables have been -removed. Sources must be defined within the catalog configuration as a list of -sources. As always, use the sandbox deployment's ``environment.py`` as a model -when upgrading personal deployments. - - -HumanCellAtlas/dcp2#17 TDR dev dataset is stale -=============================================== - -Before upgrading to this commit, run:: - - python scripts/reindex.py --delete --catalogs dcp2ebi it2ebi lungmap it3lungmap - - -#3196 Cover can_bundle.py in integration tests -============================================== - -Follow instructions in section 2.3.1 of the README. - - -#3448 Make BQ slot location configurable -======================================== - -A new configuration variable has been added, ``AZUL_TDR_SOURCE_LOCATION``. -Set the variable to the storage location of the snapshots the deployment is -configured to index. Concurrently indexing snapshots with inconsistent locations -is no longer supported. As always, use the sandbox deployment's -``environment.py`` as a model when upgrading personal deployments. - - -#2750 Add partition_prefix_length to sources -============================================ - -The syntax of the ``AZUL_TDR_SOURCES`` and ``AZUL_TDR_…_SOURCES`` environment -variables was modified to include a partition prefix length. To specify a -partition prefix length within a source, append a slash delimiter ``/`` followed -by a partition length (e.g., ``/2``) to the source entry in the -deployment's ``environment.py`` . If the partition prefix length is not -specified in one of the above variables, the default value from -``AZUL_PARTITION_PREFIX_LENGTH`` will be used. -As always, use the sandbox deployment's ``environment.py`` as a template. - - -#2865 Allow catalog.internal to be configurable -=============================================== - -The definition of the ``AZUL_CATALOGS`` environment variable now requires -the ``internal`` property. All IT catalogs must have the ``internal`` property -set to ``True``, while for non-IT catalogs it must be set to ``False``. As -always, use the sandbox deployment's ``environment.py`` as a model when -upgrading personal deployments. - - -#2495 Convert AZUL_CATALOGS to JSON -=================================== - -The definition of the ``AZUL_CATALOGS`` environment variable has been changed to -contain a JSON string. Personal deployments must be upgraded to reflect this -change in format. For details, refer to the specification within the -``environment.py`` file in the project root. As always, use the sandbox -deployment's ``environment.py`` as a model when upgrading personal deployments. - - -#3137 Increase lambda concurrency and BigQuery slots in prod -============================================================ - -If you set the variable `AZUL_INDEXER_CONCURRENCY` in your personal deployment, -replace the setting with two separate settings for -`AZUL_CONTRIBUTION_CONCURRENCY` and `AZUL_AGGREGATION_CONCURRENCY`. Also note -that you can now set different concurrencies for the retry lambdas. - - -#3080 Provision separate OAuth Client IDs for lower deployments -================================================================ - -1. Follow the instructions in section 3.2.2 of the README. For step 8, replace - the previously configured Client ID with the one you just created in your - `environment.py` file. - -2. From the hca-dev Google Cloud console, navigate to *APIs & Services* -> - *Credentials* - -3. Select the `azul-dev` Client ID and click the pencil icon to edit - -4. Delete the URL's corresponding to your deployment under - *Authorized JavaScript origins* and *Authorized redirect URIs* - -5. CLick *SAVE* - -6. `_refresh` - - -#2978 Use public snapshots for unauthenticated service requests -=============================================================== - -A second Google service account, ``AZUL_GOOGLE_SERVICE_ACCOUNT_PUBLIC``, has -been added and needs to be registered and authorized with SAM. Run `_refresh` -and `make deploy` to create the service account and register it with SAM. - -You can obtain the full email address of the public service account by running: -:: - - python3 -c 'from azul.terra import TDRClient; print(TDRClient.with_public_service_account_credentials().credentials.service_account_email)' - -This email must then be manually added to the group `azul-public-dev` by a team -member with administrator access (currently Hannes or Noah). - - -#2951 Add OAuth 2.0 authentication and log user IDs (#2951) -=========================================================== - -Follow the instructions in section 3.2.2 of the README - - -#2650 Add prefix to sources -=========================== - -Remove the ``azul_dss_query_prefix`` variable from any ``environment.py`` -files for personal deployments in which ``AZUL_DSS_ENDPOINT`` is set to -``None``. For personal deployments in which that is not the case, rename the -variable to ``AZUL_DSS_QUERY_PREFIX``. - -The syntax of ``AZUL_TDR_SOURCES`` and ``AZUL_TDR_…_SOURCES`` environment -variables was modified to include a UUID prefix. To upgrade a -deployment, append every source entry in the deployment's ``environment.py`` -with a colon delimiter ``:`` followed by a valid hexadecimal prefix e.g., -``:42``. For IT catalogs within a personal deployment set the source prefix to -an empty string. Failure to do so may cause IT errors. As always, use the -sandbox deployment's ``environment.py`` as a template. - - -#2950 Move auth and cart service to attic -========================================= - -1. Before upgrading to this commit, run :: - - source environment - _select foo - (cd terraform && make validate && terraform destroy \ - -target=module.chalice_service.aws_api_gateway_rest_api.rest_api \ - -target=module.chalice_service.aws_api_gateway_deployment.rest_api ) - -2. Upgrade to this commit or a later one and run :: - - _refresh - make deploy - - -#2755 Change AZUL_TDR_SOURCE to AZUL_TDR_SOURCES -================================================ - -Rename ``AZUL_TDR_SOURCE`` to ``AZUL_TDR_SOURCES`` and ``AZUL_TDR_…_SOURCE`` to -``AZUL_TDR_…_SOURCES``. Wrap the value of these entries in ``','.join([…,])``. -Yes, trailing comma after the entry, diverging from our guidelines, but these -entries will soon have multiple items and we want to start minimizing the -diffs from the onset. If you have multiple ``AZUL_TDR_…_SOURCES`` entries of -the same value, consider interpolating a dictionary comprehension to eliminate -the duplication. As always, use the sandbox deployment's ``environment.py`` as -a template. - - -#2399 Reduce portal DB IT concurrency -===================================== - -Reset the integrations portal database to its default state to ensure that no -pollution persists from previous IT failures :: - - python3 scripts/reset_portal_db.py - - -#2066 Add means for determining which catalogs are available -============================================================ - -The syntax of the value of the AZUL_CATALOGS environment variable was modified -to include an atlas name. In the future catalogs from other atlases will be -added, but at the moment all catalogs belong to the HCA atlas. To upgrade a -deployment, prepend every catalog entry in that variable with ``hca:``. - - -#2445 Example deployment is stale -================================= - -This change does not modify any environment variables, it just streamlines -where and how they are set. Personal deployments most resemble the sandbox so it -makes sense to use the sandbox as a template instead of a dedicated example -deployment. - -1. Remove all ``environment.local`` files you may have lying around in your - working copy. This commit removes the ``.gitignore`` rule for them so they - should show up as new files. Before deleting such a file, check if you want - to port any settings from it to the corresponding ``environment.local.py``. - -2. Synchronize ``deployments/sandbox/environment.py`` with the corresponding - file in each of your personal deployments. You want the personal - deployment's file to look structurally the same as the one for the sandbox - while retaining any meaningful differences between your personal - deployment and the sandbox. This will make it easier in the future to keep - your personal deployment up-to date with the sandbox. I used PyCharm's - diff editor for this but you could also copy the sandbox files and apply - any differences as if it were the first time you created the deployment. - -3. Check your ``environment.local.py`` files for redundant or misplaced - variables. Use the corresponding ``.example.environment.local.py`` files as - a guide. - - -#2494 Move lower deployments to ``platform-hca-dev`` -==================================================== - -1. Before upgrading to this commit run :: - - source environment - _select yourname.local - _preauth - ( cd terraform && make validate && terraform destroy \ - -target google_service_account.azul \ - -target google_project_iam_custom_role.azul \ - -target google_project_iam_member.azul ) - -2. Upgrade to this commit or a later one - -3. Make sure that your individual Google account and you burner account are - owners of the Google project ``platform-hca-dev``. Create a personal service - account and obtain its private key. Be sure to set the environment variable - ``GOOGLE_APPLICATION_CREDENTIALS`` to the new key. - -4. Ask to have your burner added as an admin of the ``azul-dev`` SAM group - (`README sections 2.3.2 and 2.3.3`_). - -5. For your personal deployment, set ``GOOGLE_PROJECT`` to ``platform-hca-dev`` - and run :: - - _refresh && _preauth - make package deploy - -6. When that fails to verify TDR access (it should, and the error message will - contain the service account name), add your personal deployment's service - account to the ``azul-dev`` SAM group (`README sections 2.3.2 and 2.3.3`_) - and run ``make deploy`` again. - -.. _README sections 2.3.2 and 2.3.3: ./README.md#232-google-cloud-credentials - - -#2658 Disable DSS plugin in all deployments -=========================================== - -In your personal deployment configuration, - -* Remove any ``AZUL_CATALOGS`` entries that contain ``repository/dss`` - -* Unset any environment variables starting in ``AZUL_DSS_`` - -Use the `sandbox` deployment's configuration as a guide. - - -#2246 Add deployment incarnation counter -======================================== - -See instructions for #2143 below. - - -#2143 Merge service accounts for indexer and service -==================================================== - -1. Before upgrading to this commit, run :: - - source environment - _select foo - _preauth - (cd terraform && make validate && terraform destroy -target=google_service_account.indexer) - - -2. Upgrade to this commit or a later one and run :: - - _refresh - _preauth - make package deploy - -3. If this fails—it should—with - - azul.RequirementError: Google service account - azul-ucsc-0-foo@human-cell-atlas-travis-test.iam.gserviceaccount.com is - not authorized to access the TDR BigQuery tables. Make sure that the SA - is registered with SAM and has been granted repository read access for - datasets and snapshots. - - let someone who can administer the SAM group that controls access to TDR - know of the renamed service account via Slack. The administrator will need - to replace the old service account email with the new one. For example, - ask them to replace - - ``azul-ucsc-indexer-foo@human-cell-atlas-travis-test.iam.gserviceaccount.com`` - - with - - ``azul-ucsc-0-foo@human-cell-atlas-travis-test.iam.gserviceaccount.com`` - -4. Run :: - - make -C terraform sam - - which should now succeed. - - -#2332 Version of pip used by build ignores wheel for gevent -=========================================================== - -Run ``make requirements``. - - -#1921 Incorporate symlink fix from Chalice upstream -=================================================== - -Run ``make requirements``. - - -#2318 Switch dcp2 catalog to optimusb snapshot -============================================== - -Update ``$AZUL_TDR_SOURCE`` in personal deployments. - - -#1764 Adapt /dss/files proxy to work with v2 and TDR -==================================================== - -Run ``make requirements``. - - -#1398 Locust script is stale -============================ - -Run ``pip uninstall locustio && make requirements``. - - -#2313 Fix `make requirements_update` assertion failure -====================================================== - -Run ``make requirements``. - - -#2269 Fix: `make requirements_update` fails in `sed` on macOS -============================================================= - -Run ``make requirements``. - - -#2261 Fix: `make requirements_update` may use stale docker image layer -====================================================================== - -Run ``make requirements``. - - -#2149 Update DCP2 catalog to `hca_ucsc_files___20200909` snapshot -================================================================= - -Change ``AZUL_TDR_SOURCE`` in personal deployments to refer to the snapshot -mentioned in the title above. - - -#2025 Register indexer SA with Broad's SAM during deployment -============================================================ - -This PR introduces two new deployment-specific environment variables, -``AZUL_TDR_SERVICE_URL`` and ``AZUL_SAM_SERVICE_URL``. Copy the settings for -these variables from the example deployment to your personal deployment. - -Service accounts must be registered and authorized with SAM for integration -tests to pass. See `section 3.2.1`_ of the README for registration instructions. - -.. _section 3.2.1: ./README.md#321-tdr-and-sam - - -#2069 Upgrade PyJWT to 1.7.1 -============================ - -The PyJWT dependency has been pinned from v1.6.4 to v1.7.1. Update by doing -`make requirements`. - - -#2112 Upgrade Chalice version to 1.14.0+5 -========================================= - -The Chalice dependency was updated. Run :: - - make requirements - - -#2149 Switch to TDR snapshot hca_dev_20200817_dssPrimaryOnly -============================================================ - -Change ``AZUL_TDR_SOURCE`` in personal deployments to refer to the snapshot -mentioned in the title above. - - -#2071 Separate ES domain for sandbox and personal deployments -============================================================= - -1. Before upgrading to this commit, and for every one of your personal - deployments, run :: - - python scripts/reindex.py --delete --catalogs it1 it2 dcp1 dcp2 - - to delete any indices that deployment may have used on the ``dev`` ES domain. - -2. Upgrade to this commit or a later one. - -3. For each personal deployment: - - a. Configure it to share an ES domain with the sandbox deployment. See - example deployment for details. - - b. Run ``make package`` - - c. Run ``make deploy`` - - d. Run ``make create`` - - e. Run ``make reindex`` - - -#2015 Change DRS URLs to Broad resolver -======================================= - -Rename `AZUL_TDR_TARGET` to `AZUL_TDR_SOURCE` in `environment.py` files for -personal deployments. - - -#2025 Register indexer SA with Broad's SAM during deployment -============================================================ - -This PR introduces two new deployment-specific environment variables, -``AZUL_TDR_SERVICE_URL`` and ``AZUL_SAM_SERVICE_URL``. Copy the settings for -these variables from the sandbox deployment to your personal deployment. - - -#2011 Always provision indexer service account -============================================== - -The indexer service account is provisioned, even if ``AZUL_SUBSCRIBE_TO_DSS`` is -0. Make sure that ``GOOGLE_APPLICATION_CREDENTIALS`` is set in -``environment.local.py`` for all deployments that you use. - - -#1644 Replace `azul_home` with `project_root` -============================================= - -Replace references to ``azul_home`` with ``project_root`` in personal deployment -files (``environment.local.py`` and -``deployments/*.local/environment{,.local}.py``). - - -#1719 Upgrade Elasticsearch version to 6.8 -========================================== - -The personal deployments that share an ES domain with ``dev`` need to be -redeployed and reindexed:: - - make package - make deploy - make reindex - - -#1770 Move `json-object` wheel from lambda packages to layer package -==================================================================== - -Run :: - - rm -r lambdas/service/vendor/jsonobject* lambdas/indexer/vendor/jsonobject* - -To ensure ``json-object`` is only deployed via the dependencies layer. - - -#1673 Ensure Lambda package hash is deterministic -================================================= - -#. If you haven't yet, install Python 3.8. - -#. Recreate your virtual environment:: - - make virtualenv - make requirements - make envhook # if you use PyCharm - -#. If you use PyCharm, update your interpreter settings by going to - ``Settings > Project: azul > Project Interpreter``. From the drop down, - select ``Show All``. Use the minus sign to remove the Python 3.6 entry - at ``azul/.venv/bin/python``. Then use the plus sign to add the newly - generated Python 3.8 interpreter, located at the same path as the one you - just removed. - - -#1645 Rethink template config variable mechanism -================================================ - -The format of environment variable 'AZUL_SUBDOMAIN_TEMPLATE' has been changed -and will need to be updated in personal deployment's 'environment.py' file. - -Change :: - - 'AZUL_SUBDOMAIN_TEMPLATE': '{{lambda_name}}.{AZUL_DEPLOYMENT_STAGE}', - -to :: - - 'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}', - - -#1272 Use Lambda layers to speed up ``make deploy`` -=================================================== - -Upgrading with these changes should work as expected. - -If downgrading, however, you may encounter a Terraform cycle. This can be -resolved by running :: - - cd terraform - make init - terraform destroy -target aws_lambda_layer_version.dependencies_layer - - -#1577 Switch all deployments to DSS ``prod`` -============================================ - -Please switch your personal deployments to point at the production instance of -the DSS. See the example configuration files in ``deployments/.example.local`` -for the necessary configuration changes. - - -#556 Deploying lambdas with Terraform -===================================== - -To deploy lambdas with Terraform you will need to remove the currently deployed -lambda resources using Chalice. Checkout the most recent commit *before* these -changes and run :: - - cd terraform - make init - terraform destroy $(terraform state list | grep aws_api_gateway_base_path_mapping | sed 's/^/-target /') - cd .. - make -C lambdas delete - -If the last command fails with a TooManyRequests error, wait 1min and rerun it. - -Switch back to your branch that includes these changes. Now use Chalice to -generate the new Terraform config. Run :: - - make deploy - -And finally :: - - make terraform - -In the unlikely case that you need to downgrade, perform the steps below. - -Switch to the new branch you want to deploy. Run :: - - cd terraform - rm -r indexer/ service/ - make init - terraform destroy $(terraform state list | grep aws_api_gateway_base_path_mapping | sed 's/^/-target /') - cd .. - make terraform - -This will remove the Lambda resources provisioned by Terraform. Now run :: - - make deploy - -to set up the Lambdas again, and finally :: - - make terraform - -To complete the API Gateway domain mappings, etc. - -Run :: - - make deploy - -a final time to work around a bug with OpenAPI spec generation. - - -#1637 Refactor handling of environment for easier reuse -======================================================= - -1. Run :: - - python scripts/convert_environment.py deployments/foo.local/environment{,.local} - - where ``foo.local`` is the name of your personal deployment. This should - create ``environment.py`` and possibly ``environment.local.py`` with - essentially the same settings, but in Python syntax. - -2. Close the shell, start a new one and activate your venv - -3. Run ``source environment`` - -4. Run ``_select foo.local`` - -5. If you use ``envhook.py`` - - i) Reinstall it :: - - python scripts/envhook.py remove - python scripts/envhook.py install - - ii) Confirm that PyCharm picks up the new files via ``envhook.py`` by starting a Python console inside PyCharm or - running a unit test - - iii) Confirm that running ``python`` from a shell picks up the new files via - ``envhook.py`` - -6. Confirm that ``make deploy`` and ``make terraform`` still work diff --git a/deployments/anvilbox/.example.environment.local.py b/deployments/anvilbox/.example.environment.local.py deleted file mode 120000 index 31145230db..0000000000 --- a/deployments/anvilbox/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../sandbox/.example.environment.local.py \ No newline at end of file diff --git a/deployments/anvilbox/environment.py b/deployments/anvilbox/environment.py deleted file mode 100644 index 83a5ce909b..0000000000 --- a/deployments/anvilbox/environment.py +++ /dev/null @@ -1,173 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -is_sandbox = True - -pop = 1 # remove snapshot - - -def bqsrc(google_project: str, - snapshot: str, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - assert len(google_project) == 8, google_project - project = 'datarepo-dev-' + google_project - assert not snapshot.startswith('ANVIL_'), snapshot - snapshot = 'ANVIL_' + snapshot - return mksrc('bigquery', project, snapshot, flags, prefix) - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - project = '_'.join(snapshot.split('_')[1:-3]) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -anvil_sources = mkdict({}, 3, mkdelta([ - bqsrc('e53e74aa', '1000G_2019_Dev_20230609_ANV5_202306121732'), - bqsrc('42c70e6a', 'CCDG_Sample_1_20230228_ANV5_202302281520'), - bqsrc('97ad270b', 'CMG_Sample_1_20230225_ANV5_202302281509') -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `anvilbox` deployment here. The anvilbox is used - # to run integration tests against PRs and to perform CI/CD experiments. - # - # You can use this file as a template for a personal deployment. Look - # for conditionals using the `is_sandbox` variable and adjust the `else` - # branch accordingly. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - # When using this file as a template for a personal deployment, replace - # `None` with a short string that is specific to YOU. - # - 'AZUL_DEPLOYMENT_STAGE': 'anvilbox' if is_sandbox else None, - - # This deployment uses a subdomain of the `anvildev` deployment's - # domain. - # - 'AZUL_DOMAIN_NAME': 'anvil.gi.ucsc.edu', - 'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}', - - 'AZUL_CATALOGS': json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='anvil'), - repository=dict(name='tdr_anvil')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('anvil', 'anvil', anvil_sources), - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://jade.datarepo-dev.broadinstitute.org', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-dev.broadinstitute.org', - 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-dev.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-dev.broadinstitute.org', - - **( - { - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.large.search', - 'AZUL_ES_INSTANCE_COUNT': '2', - } if is_sandbox else { - # Personal deployments share an ES domain with `anvilbox` - 'AZUL_SHARE_ES_DOMAIN': '1', - 'AZUL_ES_DOMAIN': 'azul-index-anvilbox', - # Personal deployments use fewer Lambda invocations in parallel. - 'AZUL_CONTRIBUTION_CONCURRENCY': '8', - 'AZUL_AGGREGATION_CONCURRENCY': '8', - } - ), - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'anvil', - - # When using this file as a template for a personal deployment, change - # `None` to a string containing YOUR email address. - # - 'AZUL_OWNER': 'hannes@ucsc.edu' if is_sandbox else None, - - 'AZUL_MONITORING_EMAIL': '{AZUL_OWNER}', - - 'AZUL_AWS_ACCOUNT_ID': '289950828509', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-anvil-dev', - - 'AZUL_DEPLOYMENT_INCARNATION': '2', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '561542988117-cpo2avhomdh6t7fetp91js78cdhm9p47.apps.googleusercontent.com', - } diff --git a/deployments/anvildev.browser/environment.py b/deployments/anvildev.browser/environment.py deleted file mode 100644 index bc4f7ce197..0000000000 --- a/deployments/anvildev.browser/environment.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'browser', - 'azul_browser_sites': json.dumps({ - 'browser': { - 'zone': '{AZUL_DOMAIN_NAME}', - 'domain': '{AZUL_DOMAIN_NAME}', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/anvil/anvildev', - 'tarball_name': 'anvil', - 'tarball_path': 'out', - 'real_path': '' - } - }) - } diff --git a/deployments/anvildev.gitlab/environment.py b/deployments/anvildev.gitlab/environment.py deleted file mode 100644 index 95178a85a5..0000000000 --- a/deployments/anvildev.gitlab/environment.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'gitlab', - 'azul_vpc_cidr': '172.23.0.0/16', - 'azul_vpn_subnet': '10.44.0.0/16' - } diff --git a/deployments/anvildev.shared/environment.py b/deployments/anvildev.shared/environment.py deleted file mode 120000 index 1a13b702a8..0000000000 --- a/deployments/anvildev.shared/environment.py +++ /dev/null @@ -1 +0,0 @@ -../dev.shared/environment.py \ No newline at end of file diff --git a/deployments/anvildev/.example.environment.local.py b/deployments/anvildev/.example.environment.local.py deleted file mode 120000 index 45b1b10142..0000000000 --- a/deployments/anvildev/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../dev/.example.environment.local.py \ No newline at end of file diff --git a/deployments/anvildev/environment.py b/deployments/anvildev/environment.py deleted file mode 100644 index 2c33d50286..0000000000 --- a/deployments/anvildev/environment.py +++ /dev/null @@ -1,153 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -pop = 1 # remove snapshot - - -def bqsrc(google_project: str, - snapshot: str, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - assert len(google_project) == 8, google_project - project = 'datarepo-dev-' + google_project - assert not snapshot.startswith('ANVIL_'), snapshot - snapshot = 'ANVIL_' + snapshot - return mksrc('bigquery', project, snapshot, flags, prefix) - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - project = '_'.join(snapshot.split('_')[1:-3]) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -anvil_sources = mkdict({}, 3, mkdelta([ - bqsrc('e53e74aa', '1000G_2019_Dev_20230609_ANV5_202306121732'), - bqsrc('42c70e6a', 'CCDG_Sample_1_20230228_ANV5_202302281520'), - bqsrc('97ad270b', 'CMG_Sample_1_20230225_ANV5_202302281509') -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `anvildev` (short for AnVIL development) - # deployment here. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - 'AZUL_DEPLOYMENT_STAGE': 'anvildev', - - 'AZUL_DOMAIN_NAME': 'anvil.gi.ucsc.edu', - - 'AZUL_CATALOGS': json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='anvil'), - repository=dict(name='tdr_anvil')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('anvil', 'anvil', anvil_sources), - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://jade.datarepo-dev.broadinstitute.org', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-dev.broadinstitute.org', - 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-dev.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-dev.broadinstitute.org', - - 'AZUL_ENABLE_MONITORING': '1', - - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.large.search', - 'AZUL_ES_INSTANCE_COUNT': '2', - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'anvil', - - 'AZUL_OWNER': 'hannes@ucsc.edu', - - 'AZUL_MONITORING_EMAIL': 'azul-group@ucsc.edu', - - 'AZUL_AWS_ACCOUNT_ID': '289950828509', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-anvil-dev', - - 'AZUL_DEPLOYMENT_INCARNATION': '2', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '561542988117-3cv4g8ii9enl2000ra6m02r3ne7bgnth.apps.googleusercontent.com', - - 'azul_slack_integration': json.dumps({ - 'workspace_id': 'T09P9H91S', # ucsc-gi.slack.com - 'channel_id': 'C04K4BQET7G' # #team-boardwalk-anvildev - }), - } diff --git a/deployments/anvilprod.browser/environment.py b/deployments/anvilprod.browser/environment.py deleted file mode 100644 index 221a18df02..0000000000 --- a/deployments/anvilprod.browser/environment.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'browser', - 'azul_browser_sites': json.dumps({ - 'browser': { - 'zone': '{AZUL_DOMAIN_NAME}', - 'domain': '{AZUL_DOMAIN_NAME}', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/anvil/anvilprod', - 'tarball_name': 'anvil', - 'tarball_path': 'out', - 'real_path': '' - } - }) - } diff --git a/deployments/anvilprod.gitlab/environment.py b/deployments/anvilprod.gitlab/environment.py deleted file mode 100644 index cda3ae93f0..0000000000 --- a/deployments/anvilprod.gitlab/environment.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'gitlab', - 'azul_vpc_cidr': '172.24.0.0/16', - 'azul_vpn_subnet': '10.45.0.0/16' - } diff --git a/deployments/anvilprod.shared/environment.py b/deployments/anvilprod.shared/environment.py deleted file mode 100644 index 58e7438802..0000000000 --- a/deployments/anvilprod.shared/environment.py +++ /dev/null @@ -1,29 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'shared', - } diff --git a/deployments/anvilprod/.example.environment.local.py b/deployments/anvilprod/.example.environment.local.py deleted file mode 120000 index 4f077d1558..0000000000 --- a/deployments/anvilprod/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../prod/.example.environment.local.py \ No newline at end of file diff --git a/deployments/anvilprod/environment.py b/deployments/anvilprod/environment.py deleted file mode 100644 index 6766f5a402..0000000000 --- a/deployments/anvilprod/environment.py +++ /dev/null @@ -1,1284 +0,0 @@ -import base64 -import bz2 -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -pop = 1 # remove snapshot - - -def bqsrc(google_project: str, - snapshot: str, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - assert len(google_project) == 8, google_project - project = 'datarepo-' + google_project - # Some snapshots start with AnVIL instead of ANVIL - if not snapshot.upper().startswith('ANVIL_'): - snapshot = 'ANVIL_' + snapshot - return mksrc('bigquery', project, snapshot, flags, prefix) - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - project = '_'.join(snapshot.split('_')[1:-3]) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -anvil_sources = mkdict({}, 11, mkdelta([ - bqsrc('3edb7fb1', '1000G_high_coverage_2019_20230517_ANV5_202305181946'), - bqsrc('db7353fb', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20230419_ANV5_202304201858'), - bqsrc('3b8ef67a', 'CMG_UWASH_DS_BDIS_20230418_ANV5_202304201958'), - bqsrc('5d27ebfe', 'CMG_UWASH_DS_HFA_20230418_ANV5_202304201932'), - bqsrc('9d1a6e0a', 'CMG_UWASH_DS_NBIA_20230418_ANV5_202304201949'), - bqsrc('3243df15', 'CMG_UWASH_HMB_20230418_ANV5_202304201923'), - bqsrc('50484f86', 'CMG_UWASH_HMB_IRB_20230418_ANV5_202304201915'), - bqsrc('74bd0964', 'CMG_UWash_DS_EP_20230419_ANV5_202304201906'), - bqsrc('e5914f89', 'CMG_UWash_GRU_20230418_ANV5_202304201848'), - bqsrc('97ec5366', 'CMG_UWash_GRU_IRB_20230418_ANV5_202304201940'), - bqsrc('4150bd87', 'GTEx_V8_hg38_20230419_ANV5_202304202007') -])) - -anvil1_sources = mkdict(anvil_sources, 63, mkdelta([ - bqsrc('d53aa186', 'CMG_BROAD_BRAIN_ENGLE_WES_20221102_ANV5_202304241525'), - bqsrc('69b2535a', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202304241530'), - bqsrc('490be510', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202304241538'), - bqsrc('3b33c41b', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202304241459'), - bqsrc('96df3cea', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202304241501'), - bqsrc('179ee079', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202304241503'), - bqsrc('3dd4d039', 'CMG_Broad_Brain_Gleeson_WES_20221117_ANV5_202304241517'), - bqsrc('c361373f', 'CMG_Broad_Brain_Muntoni_WES_20221102_ANV5_202304241527'), - bqsrc('12ac342c', 'CMG_Broad_Brain_NeuroDev_WES_20221102_ANV5_202304241529'), - bqsrc('d7bfafc6', 'CMG_Broad_Brain_Thaker_WES_20221102_ANV5_202304241531'), - bqsrc('29812b42', 'CMG_Broad_Eye_Pierce_WES_20221205_ANV5_202304242250'), - bqsrc('48134558', 'CMG_Broad_Eye_Pierce_WGS_20221117_ANV5_202304241507'), - bqsrc('36ebaa12', 'CMG_Broad_Heart_PCGC_Tristani_WGS_20221025_ANV5_202304211840'), - bqsrc('f9826139', 'CMG_Broad_Heart_Seidman_WES_20221117_ANV5_202304241504'), - bqsrc('85952af8', 'CMG_Broad_Kidney_Hildebrandt_WES_20230525_ANV5_202305251733'), - bqsrc('ee4ae9a1', 'CMG_Broad_Kidney_Hildebrandt_WGS_20221025_ANV5_202304211844'), - bqsrc('cf168274', 'CMG_Broad_Kidney_Pollak_WES_20221025_ANV5_202304211846'), - bqsrc('4d47ba2c', 'CMG_Broad_Muscle_Beggs_WGS_20221102_ANV5_202304241533'), - bqsrc('82d1271a', 'CMG_Broad_Muscle_Bonnemann_WES_20221117_ANV5_202304241509'), - bqsrc('6be3fb25', 'CMG_Broad_Muscle_Bonnemann_WGS_20221117_ANV5_202304241510'), - bqsrc('b168eb10', 'CMG_Broad_Muscle_KNC_WES_20221116_ANV5_202304242219'), - bqsrc('372244aa', 'CMG_Broad_Muscle_KNC_WGS_20221117_ANV5_202304242221'), - bqsrc('77a6c0aa', 'CMG_Broad_Muscle_Kang_WGS_20221025_ANV5_202304211849'), - bqsrc('736a5f1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202304271308'), - bqsrc('5019143b', 'CMG_Broad_Muscle_Myoseq_WES_20230621_ANV5_202306211852'), - bqsrc('27eb651a', 'CMG_Broad_Muscle_Myoseq_WGS_20221208_ANV5_202304271310'), - bqsrc('c087af7a', 'CMG_Broad_Muscle_OGrady_WES_20221205_ANV5_202304242252'), - bqsrc('db987a2e', 'CMG_Broad_Muscle_Ravenscroft_WES_20221208_ANV5_202304271311'), - bqsrc('05df566c', 'CMG_Broad_Muscle_Topf_WES_20221208_ANV5_202304271313'), - bqsrc('87d91f06', 'CMG_Broad_Orphan_Chung_WES_20221102_ANV5_202304241534'), - bqsrc('25f6b696', 'CMG_Broad_Orphan_Estonia_Ounap_WES_20221117_ANV5_202304241512'), - bqsrc('c3b16b41', 'CMG_Broad_Orphan_Estonia_Ounap_WGS_20221205_ANV5_202304242255'), - bqsrc('e2976b05', 'CMG_Broad_Orphan_Jueppner_WES_20221102_ANV5_202304241535'), - bqsrc('32fe2260', 'CMG_Broad_Orphan_Lerner_Ellis_WES_20221102_ANV5_202304241536'), - bqsrc('6f9e574e', 'CMG_Broad_Orphan_Manton_WES_20221117_ANV5_202304241513'), - bqsrc('53cd689b', 'CMG_Broad_Orphan_Manton_WGS_20221117_ANV5_202304241515'), - bqsrc('e7c5babf', 'CMG_Broad_Orphan_Scott_WES_20221025_ANV5_202304241458'), - bqsrc('051877f4', 'CMG_Broad_Orphan_Sweetser_WES_20221102_ANV5_202304241539'), - bqsrc('555c7706', 'CMG_Broad_Orphan_VCGS_White_WES_20221018_ANV5_202304241522'), - bqsrc('3a8f7952', 'CMG_Broad_Orphan_VCGS_White_WGS_20221117_ANV5_202304241523'), - bqsrc('b699c5e3', 'CMG_Broad_Rare_RGP_WES_20221102_ANV5_202304241540'), - bqsrc('2d5bd095', 'CMG_Broad_Stillbirth_Wilkins_Haug_WES_20221102_ANV5_202304241542'), - bqsrc('f3d0eda6', 'CMG_UWash_GRU_20230418_ANV5_202306211828'), - bqsrc('ab5c3fa5', 'CMG_YALE_DS_RARED_20221020_ANV5_202304211812'), - bqsrc('d51578f4', 'CMG_Yale_GRU_20221020_ANV5_202304211517'), - bqsrc('bcedc554', 'CMG_Yale_HMB_20221020_ANV5_202304211813'), - bqsrc('f485fa3e', 'CMG_Yale_HMB_GSO_20221020_ANV5_202304211519'), - bqsrc('45487b69', 'GTEx_Somatic_WGS_20230331_ANV5_202304211636'), - bqsrc('5ebc368c', 'GTEx_V7_hg19_20221128_ANV5_202304211804'), - bqsrc('864913f2', 'GTEx_V9_hg38_20221128_ANV5_202304211853'), - bqsrc('b093b69d', 'GTEx_public_data_20221115_ANV5_202304211659'), - bqsrc('d948d21a', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202304271345'), - bqsrc('1cb73890', 'cmg_broad_heart_ware_wes_20221215_ANV5_202304242145'), -])) - -anvil2_sources = mkdict(anvil1_sources, 104, mkdelta([ - bqsrc('36124817', 'African_American_Seq_HGV_20230727_ANV5_202308291753'), - bqsrc('d795027d', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20221020_ANV5_202304211823'), - bqsrc('642829f3', 'CCDG_Broad_CVD_AF_VAFAR_WES_20221024_ANV5_202304211826'), - bqsrc('08216a2c', 'CCDG_Broad_CVD_AFib_Vanderbilt_Ablation_WGS_20221020_ANV5_202304211819'), - bqsrc('74975e89', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202304271548'), - bqsrc('ad61c47e', 'CCDG_NHGRI_Broad_ASD_Daly_phs000298_WES_vcf_20230403_ANV5_202304271610'), - bqsrc('5e719362', 'CCDG_NYGC_AI_Asthma_Gala2_WGS_20230605_ANV5_202306131248'), - bqsrc('2734a0e4', 'CCDG_NYGC_NP_Alz_EFIGA_WGS_20230605_ANV5_202306141705'), - bqsrc('710fc60d', 'CCDG_NYGC_NP_Alz_LOAD_WGS_20230605_ANV5_202306131256'), - bqsrc('9626b3eb', 'CCDG_NYGC_NP_Alz_WHICAP_WGS_20230605_ANV5_202306131303'), - bqsrc('25ec7b57', 'CCDG_WASHU_PAGE_20221220_ANV5_202304271544'), - bqsrc('6d8536f4', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202304211527'), - bqsrc('482ab960', 'CMH_GAFK_GS_long_read_20221109_ANV5_202304211529'), - bqsrc('8745e97d', 'CMH_GAFK_scRNA_20221107_ANV5_202304211533'), - bqsrc('1c89dcac', 'CSER_CHARM_GRU_20221208_ANV5_202304271348'), - bqsrc('12d56848', 'CSER_NCGENES2_GRU_20221208_ANV5_202304271349'), - bqsrc('8a4d67ef', 'CSER_SouthSeq_GRU_20221208_ANV5_202304271351'), - bqsrc('f622180d', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_VillageData_20230109_ANV5_202304242045'), - bqsrc('732d1a55', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20230109_ANV5_202304242048'), - bqsrc('90bab913', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202304242043'), - bqsrc('e4eb7641', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_WGS_20221115_ANV5_202304242052'), - bqsrc('f9aef3dc', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20221103_ANV5_202304242049'), - bqsrc('aca6a582', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202304242046'), - bqsrc('06abb598', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202304211817'), - bqsrc('7c4410ed', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202304211721'), - bqsrc('84d2e3b1', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202304211827'), - bqsrc('ffbc38fd', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202304211722'), - bqsrc('b1f3e0d1', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202304211749'), - bqsrc('11330a21', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202304211759'), - bqsrc('86a1dbf3', 'ccdg_broad_ai_ibd_daly_bernstein_gsa_20221025_ANV5_202304241921'), - bqsrc('833ff0a3', 'eMERGE_GRU_IRB_NPU_eMERGEseq_20230130_ANV5_202304271614'), - bqsrc('baf040af', 'eMERGE_GRU_IRB_PUB_NPU_eMERGEseq_20230130_ANV5_202304271616'), - bqsrc('270b3b62', 'eMERGE_GRU_IRB_eMERGEseq_20230130_ANV5_202304271613'), - bqsrc('c13efbe9', 'eMERGE_GRU_NPU_eMERGEseq_20230130_ANV5_202304271617'), - bqsrc('34f8138d', 'eMERGE_GRU_eMERGEseq_20230130_ANV5_202304271612'), - bqsrc('90b7b6e8', 'eMERGE_HMB_GSO_eMERGEseq_20230130_ANV5_202304271621'), - bqsrc('6e6dca92', 'eMERGE_HMB_IRB_PUB_eMERGEseq_20230130_ANV5_202304271622'), - bqsrc('1ddf2a8e', 'eMERGE_HMB_NPU_eMERGEseq_20230130_ANV5_202304271624'), - bqsrc('dba97a65', 'eMERGE_HMB_eMERGEseq_20230130_ANV5_202304271619'), - bqsrc('51aa9a22', 'eMERGE_PGRNseq_20230118_ANV5_202304241853'), - bqsrc('ce8c469f', 'eMERGE_PRS_Arrays_20221220_ANV5_202304271346') -])) - -anvil3_sources = mkdict(anvil2_sources, 151, mkdelta([ - bqsrc('9a74aed3', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202310091900'), - bqsrc('0768a322', 'CCDG_Broad_CVD_AF_Ellinor_MGH_Arrays_20221024_ANV5_202304211831'), - bqsrc('2b135baf', 'CCDG_Broad_CVD_AFib_MGH_WGS_20221024_ANV5_202304211829'), - bqsrc('96b594f9', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202310101655'), - bqsrc('318ae48e', 'CCDG_Broad_CVD_Stroke_BRAVE_WGS_20221107_ANV5_202304241543'), - bqsrc('7ea7a6e9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20221107_ANV5_202304241545'), - bqsrc('2339e241', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202304241613'), - bqsrc('cd6cee03', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202304241612'), - bqsrc('da88c3ce', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202304241657'), - bqsrc('2b361bda', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202304241549'), - bqsrc('6eeff3fc', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202304241701'), - bqsrc('21923ed0', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202304241556'), - bqsrc('5b10132b', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202304241552'), - bqsrc('d2d5ba15', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202304241702'), - bqsrc('fc0a35a8', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202304241704'), - bqsrc('f14cd6d7', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202304241659'), - bqsrc('3832cf81', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202304241548'), - bqsrc('098aadb0', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202304241551'), - bqsrc('d9ea4f23', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202304241554'), - bqsrc('0c9ab563', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202304241705'), - bqsrc('a383d752', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202304241707'), - bqsrc('03b52641', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202304241601'), - bqsrc('2e9ab296', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202304241605'), - bqsrc('89162c54', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202304241609'), - bqsrc('fd5cd738', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202304241602'), - bqsrc('d987821a', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202304241709'), - bqsrc('b93e1cfa', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202304241604'), - bqsrc('2e9630dd', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202304241559'), - bqsrc('ee58a7a9', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202306131343'), - bqsrc('ff5356bb', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202304241555'), - bqsrc('2262daa7', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202304241610'), - bqsrc('2a947c33', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202304241558'), - bqsrc('5b3c42e1', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202304241713'), - bqsrc('91b4b33c', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202306211900'), - bqsrc('e4fe111a', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202304241607'), - bqsrc('8b120833', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202304241716'), - bqsrc('f051499d', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202304241720'), - bqsrc('fd49a493', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202306211841'), - bqsrc('076da44b', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202306211847'), - bqsrc('7e03b5fd', 'CMG_Broad_Brain_Walsh_WES_20230605_ANV5_202310101734'), - bqsrc('c43e7400', 'CMG_Broad_Muscle_Kang_WES_20230525_ANV5_202310101649'), - bqsrc('14f5afa3', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_10XLRGenomes_20221115_ANV5_202310101713'), - bqsrc('94091a22', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_10XLRGenomes_20230331_ANV5_202310101715'), - bqsrc('55b75002', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202310061302'), - bqsrc('02ad84ea', 'T2T_20230714_ANV5_202310101616'), - bqsrc('08cd15a2', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202304211552'), - bqsrc('e3065356', 'ccdg_washu_cvd_eocad_biome_wgs_20221024_ANV5_202304211601'), -])) - -anvil4_sources = mkdict(anvil3_sources, 200, mkdelta([ - bqsrc('1a86e7ca', 'CCDG_Baylor_CVD_AFib_Groningen_WGS_20221122_ANV5_202304242224'), - bqsrc('92716a90', 'CCDG_Baylor_CVD_AFib_VAFAR_HMB_IRB_WGS_20221020_ANV5_202304211525'), - bqsrc('77445496', 'CCDG_Baylor_CVD_EOCAD_BioMe_WGS_20221122_ANV5_202304242226'), - bqsrc('1b0d6b90', 'CCDG_Baylor_CVD_HHRC_Brownsville_GRU_WGS_20221122_ANV5_202304242228'), - bqsrc('373b7918', 'CCDG_Baylor_CVD_HemStroke_BNI_HMB_WGS_20221215_ANV5_202304242306'), - bqsrc('efc3e806', 'CCDG_Baylor_CVD_HemStroke_Duke_DS_WGS_20221117_ANV5_202304242122'), - bqsrc('1044f96d', 'CCDG_Baylor_CVD_HemStroke_ERICH_WGS_20221207_ANV5_202304271256'), - bqsrc('f23a6ec8', 'CCDG_Baylor_CVD_HemStroke_GERFHS_HMB_WGS_20221215_ANV5_202304242307'), - bqsrc('de34ca6e', 'CCDG_Baylor_CVD_HemStroke_Regards_DS_WGS_20221117_ANV5_202304242123'), - bqsrc('d9c6f406', 'CCDG_Baylor_CVD_HemStroke_Yale_HMB_WGS_20221215_ANV5_202304242309'), - bqsrc('56883e56', 'CCDG_Baylor_CVD_Oregon_SUDS_GRU_WGS_20221215_ANV5_202304242302'), - bqsrc('7f3ba7ec', 'CCDG_Baylor_CVD_TexGen_DS_WGS_20221117_ANV5_202304242125'), - bqsrc('da965e26', 'CCDG_Baylor_CVD_Ventura_Presto_GRU_IRB_WGS_20221117_ANV5_202304242127'), - bqsrc('906bf803', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20221110_ANV5_202304241911'), - bqsrc('343ca1c3', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20221110_ANV5_202304241912'), - bqsrc('80a63603', 'CCDG_Broad_AI_IBD_Cho_WGS_20230313_ANV5_202304241903'), - bqsrc('a98e7a43', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20221110_ANV5_202304241906'), - bqsrc('381bc957', 'CCDG_Broad_AI_IBD_McCauley_WGS_20221110_ANV5_202304241914'), - bqsrc('6a10165d', 'CCDG_Broad_AI_IBD_McGovern_WGS_20221110_ANV5_202304241907'), - bqsrc('a2743c82', 'CCDG_Broad_AI_IBD_Newberry_WGS_20221025_ANV5_202304241901'), - bqsrc('ed109b2f', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_Arrays_20230612_ANV5_202306131350'), - bqsrc('3d8b62d7', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_WES_20221025_ANV5_202304241856'), - bqsrc('450ba911', 'CCDG_Broad_CVD_AF_ENGAGE_DS_WES_20230418_ANV5_202304210808'), - bqsrc('dfabf632', 'CCDG_Broad_CVD_AF_Ellinor_MGH_WES_20221117_ANV5_202304271354'), - bqsrc('485eb707', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20230128_ANV5_202304271554'), - bqsrc('58dffe5a', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20221103_ANV5_202304242105'), - bqsrc('cf7f2c0c', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20221103_ANV5_202304242107'), - bqsrc('f896734e', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20221102_ANV5_202304242039'), - bqsrc('40c2f4f4', 'CCDG_Broad_CVD_AF_Marcus_UCSF_WES_20221222_ANV5_202304242040'), - bqsrc('67117555', 'CCDG_Broad_CVD_AF_Rienstra_WES_20221222_ANV5_202304242035'), - bqsrc('c45dd622', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_Arrays_20221103_ANV5_202304242110'), - bqsrc('b12d2e52', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_WES_20230118_ANV5_202304242112'), - bqsrc('43f6230a', 'CCDG_Broad_CVD_AFib_AFLMU_WGS_20231008_ANV5_202310091911'), - bqsrc('de64d25a', 'CCDG_Broad_CVD_AFib_UCSF_WGS_20221222_ANV5_202304242037'), - bqsrc('e25350dd', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202310101704'), - bqsrc('9921a6fa', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202306211933'), - bqsrc('383d9d9b', 'CCDG_Broad_CVD_PROMIS_GRU_WES_20230418_ANV5_202306211912'), - bqsrc('5df71da4', 'CCDG_Broad_MI_InStem_WES_20221122_ANV5_202304242236'), - bqsrc('1793828c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSRS_WES_20230324_ANV5_202304241752'), - bqsrc('d44547dc', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202304271556'), - bqsrc('70c803d7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20221117_ANV5_202304271400'), - bqsrc('f5a4a895', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20221117_ANV5_202304271358'), - bqsrc('b8b8ba44', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20221117_ANV5_202304271356'), - bqsrc('0b0ca621', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202306211945'), - bqsrc('f85048a3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20221117_ANV5_202304271401'), - bqsrc('68037179', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202304271559'), - bqsrc('025215fc', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202304271601'), - bqsrc('92905a2b', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20221117_ANV5_202304271403'), - bqsrc('3f3ad5c7', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20230118_ANV5_202304271404') -])) - -anvil5_sources = mkdict(anvil4_sources, 261, mkdelta([ - bqsrc('3c30a9a2', '1000G_high_coverage_2019_20230517_ANV5_202403030329'), - bqsrc('adf70694', 'ALS_FTD_ALS_AssociatedGenes_GRU_v1_20231221_ANV5_202401112025'), - bqsrc('815ad21b', 'ALS_FTD_DEMENTIA_SEQ_GRU_v1_20231221_ANV5_202401112033'), - bqsrc('ab46a8e4', 'CCDG_NYGC_NP_Autism_ACE2_DS_MDS_WGS_20230605_ANV5_202403032021'), - bqsrc('df058a48', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202403032044'), - bqsrc('61910b61', 'CCDG_NYGC_NP_Autism_CAG_DS_WGS_20230605_ANV5_202403032053'), - bqsrc('8d6472a1', 'CCDG_NYGC_NP_Autism_HFA_DS_WGS_20230605_ANV5_202403032108'), - bqsrc('f0a12498', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_DS_WGS_20221103_ANV5_202403032124'), - bqsrc('f06dc5dd', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_GRU_WGS_20221103_ANV5_202403032131'), - bqsrc('b791f5c1', 'CCDG_NYGC_NP_Autism_SAGE_WGS_20230605_ANV5_202403032137'), - bqsrc('b9222139', 'CMG_BROAD_BRAIN_ENGLE_WES_20240205_ANV5_202402051624'), - bqsrc('7e094253', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202402281543'), - bqsrc('c797490f', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202402281552'), - bqsrc('0a1360b1', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202402290547'), - bqsrc('faa71b49', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202402290555'), - bqsrc('abce6387', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202402290606'), - bqsrc('4153ad1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202402291926'), - bqsrc('5bbb5a28', 'CMG_Broad_Orphan_Jueppner_WES_20240205_ANV5_202402051640'), - bqsrc('18bd3df4', 'CMG_UWASH_HMB_20230418_ANV5_202402070029'), - bqsrc('6f4155f2', 'CMG_UWash_GRU_20240301_ANV5_202403040330'), - bqsrc('6486ae96', 'CMG_UWash_GRU_1_20240113_ANV5_202401141440'), - bqsrc('0fad0f77', 'CMG_YALE_DS_RARED_20221020_ANV5_202402281620'), - bqsrc('ad307392', 'CMG_Yale_GRU_20221020_ANV5_202402281628'), - bqsrc('fecab5bc', 'CMG_Yale_HMB_20221020_ANV5_202402290926'), - bqsrc('f9699204', 'CMG_Yale_HMB_GSO_20221020_ANV5_202402290935'), - bqsrc('c5bd892a', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202402290945'), - bqsrc('5e64223a', 'CMH_GAFK_GS_long_read_20240301_ANV5_202403040349'), - bqsrc('ba97c05c', 'CMH_GAFK_scRNA_20221107_ANV5_202402291004'), - bqsrc('2659c380', 'CSER_CHARM_GRU_20240301_ANV5_202403040357'), - bqsrc('0f2e95ad', 'CSER_KidsCanSeq_GRU_20221208_ANV5_202402292138'), - bqsrc('62a0bd6d', 'CSER_NCGENES2_GRU_20221208_ANV5_202402292147'), - bqsrc('df02801a', 'CSER_NYCKIDSEQ_GRU_20240113_ANV5_202401141520'), - bqsrc('4b9c138d', 'CSER_NYCKIDSEQ_HMB_20240113_ANV5_202401141527'), - bqsrc('f4d60c69', 'CSER_P3EGS_GRU_20230727_ANV5_202402070059'), - bqsrc('fc5ed559', 'CSER_SouthSeq_GRU_20221208_ANV5_202402292154'), - bqsrc('74121c99', 'GTEx_BCM_GRU_CoRSIVs_20240116_ANV5_202401170141'), - bqsrc('1a706b0c', 'GTEx_Somatic_WGS_20240116_ANV5_202401170147'), - bqsrc('e063cf6d', 'GTEx_V7_hg19_20221128_ANV5_202402291034'), - bqsrc('383c097a', 'GTEx_V8_hg38_20240116_ANV5_202401170154'), - bqsrc('701eea84', 'GTEx_V9_hg38_20221128_ANV5_202402070108'), - bqsrc('ff9d78a5', 'GTEx_public_data_20240117_ANV5_202401180400'), - bqsrc('37c3d458', 'NIA_CARD_Coriell_Cell_Lines_Open_20230727_ANV5_202401111624'), - bqsrc('06c78117', 'NIA_CARD_LR_WGS_NABEC_GRU_20230727_ANV5_202401111634'), - bqsrc('e4eb7641', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_WGS_20221115_ANV5_202304242052', pop), - bqsrc('a3880121', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_WGS_20240112_ANV5_202402062129'), - bqsrc('25790186', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202403040429'), - bqsrc('b371989b', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202403040437'), - bqsrc('4a4eec27', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202403040445'), - bqsrc('a1f917db', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202403040453'), - bqsrc('6264931f', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202403040500'), - bqsrc('8d62ec8f', 'T2T_20230714_ANV5_202312122150'), - bqsrc('bfabc906', 'ccdg_asc_ndd_daly_talkowski_ac_boston_asd_exome_20221117_ANV5_202403040552'), - bqsrc('825399a4', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20221108_ANV5_202403040608'), - bqsrc('e3b070a7', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20230327_ANV5_202403040615'), - bqsrc('2354d65a', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202402291144'), - bqsrc('0ad3f21a', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20221107_ANV5_202403040623'), - bqsrc('c148a340', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20221201_ANV5_202403040630'), - bqsrc('bc613fa9', 'ccdg_asc_ndd_daly_talkowski_domenici_asd_exome_20221117_ANV5_202403040637'), - bqsrc('97e22445', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20221107_ANV5_202403040652'), - bqsrc('72efc816', 'ccdg_asc_ndd_daly_talkowski_herman_asd_exome_20221117_ANV5_202403040701'), - bqsrc('e25caee8', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20221107_ANV5_202403040708'), - bqsrc('22af2470', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20221107_ANV5_202403040716'), - bqsrc('a81009d9', 'ccdg_asc_ndd_daly_talkowski_hultman_asd_exome_20231013_ANV5_202403040723'), - bqsrc('bc078d98', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20221108_ANV5_202403040731'), - bqsrc('0949186c', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20221109_ANV5_202403040739'), - bqsrc('4dc4f939', 'ccdg_asc_ndd_daly_talkowski_lattig_asd_exome_20221122_ANV5_202403040746'), - bqsrc('5ed988f8', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20221108_ANV5_202403040800'), - bqsrc('c6a938e4', 'ccdg_asc_ndd_daly_talkowski_minshew_asd_exome_20221117_ANV5_202403040807'), - bqsrc('a245d786', 'ccdg_asc_ndd_daly_talkowski_palotie_asd_exome_20221019_ANV5_202403040815'), - bqsrc('7ddd7425', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20221108_ANV5_202403040822'), - bqsrc('aa9f0b28', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_wgs_20221027_ANV5_202403040846'), - bqsrc('0b4c3cfb', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202402291202'), - bqsrc('8023858b', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20221108_ANV5_202403040925'), - bqsrc('381b5d80', 'ccdg_broad_ai_ibd_alm_gmc_wes_20230328_ANV5_202403040932'), - bqsrc('714d60b9', 'ccdg_broad_ai_ibd_daly_alm_gmc_gsa_20221025_ANV5_202402291210'), - bqsrc('86a1dbf3', 'ccdg_broad_ai_ibd_daly_bernstein_gsa_20221025_ANV5_202304241921', pop), - bqsrc('dc7a9acd', 'ccdg_broad_ai_ibd_daly_brant_niddk_gsa_20240103_ANV5_202401112147'), - bqsrc('916fc0b6', 'ccdg_broad_ai_ibd_daly_duerr_niddk_gsa_20240113_ANV5_202402062134'), - bqsrc('48d85607', 'ccdg_broad_ai_ibd_daly_hyams_protect_wes_20240104_ANV5_202403041011'), - bqsrc('21d3c731', 'ccdg_broad_ai_ibd_daly_kupcinskas_wes_20240104_ANV5_202403041018'), - bqsrc('614a8519', 'ccdg_broad_ai_ibd_daly_lewis_ccfa_wes_20240113_ANV5_202403041026'), - bqsrc('6799d240', 'ccdg_broad_ai_ibd_daly_lewis_sparc_gsa_20240104_ANV5_202401121517'), - bqsrc('d7ae08a2', 'ccdg_broad_ai_ibd_daly_louis_wes_20240104_ANV5_202403041042'), - bqsrc('9b04a16e', 'ccdg_broad_ai_ibd_daly_mccauley_gsa_20240113_ANV5_202402062137'), - bqsrc('b6a95447', 'ccdg_broad_ai_ibd_daly_mccauley_wes_20240104_ANV5_202403041049'), - bqsrc('df7a6188', 'ccdg_broad_ai_ibd_daly_mcgovern_gsa_20240118_ANV5_202402062140'), - bqsrc('5cd83e88', 'ccdg_broad_ai_ibd_daly_mcgovern_niddk_wes_20240104_ANV5_202403041057'), - bqsrc('fa7e066f', 'ccdg_broad_ai_ibd_daly_mcgovern_share_wes_20240104_ANV5_202401121556', pop), - bqsrc('2def0ed8', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_gsa_20240105_ANV5_202401121603'), - bqsrc('6e9fe586', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_wes_20240105_ANV5_202403041109'), - bqsrc('1f3dab2b', 'ccdg_broad_ai_ibd_daly_pekow_share_gsa_20240105_ANV5_202401121646'), - bqsrc('74869ac4', 'ccdg_broad_ai_ibd_daly_pekow_share_wes_20240105_ANV5_202403041133'), - bqsrc('d95b9a73', 'ccdg_broad_ai_ibd_niddk_daly_brant_wes_20240112_ANV5_202403041232'), - bqsrc('7a0883a4', 'ccdg_broad_cvd_af_pegasus_hmb_20221025_ANV5_202403030736'), - bqsrc('f62c5ebd', 'ccdg_broad_cvd_eocad_promis_wgs_20221213_ANV5_202403030935'), - bqsrc('9d116a5c', 'ccdg_broad_mi_atvb_ds_cvd_wes_20221025_ANV5_202403031035'), - bqsrc('bb315b29', 'ccdg_nygc_np_autism_tasc_wgs_20221024_ANV5_202403032216'), - bqsrc('33e3428b', 'ccdg_washu_cvd_np_ai_controls_vccontrols_wgs_20221024_ANV5_202403032319'), - bqsrc('17c5f983', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202402290614'), - bqsrc('a46c0244', 'nhgri_broad_ibd_daly_kugathasan_wes_20240112_ANV5_202403041258'), - bqsrc('4b4f2325', 'nhgri_broad_ibd_daly_turner_wes_20240112_ANV5_202403041307'), -])) - -anvil6_sources = mkdict(anvil5_sources, 249, mkdelta([ - bqsrc('38af6304', '1000G_PRIMED_data_model_20240410_ANV5_202404101419'), - bqsrc('1a86e7ca', 'CCDG_Baylor_CVD_AFib_Groningen_WGS_20221122_ANV5_202304242224', pop), - bqsrc('92716a90', 'CCDG_Baylor_CVD_AFib_VAFAR_HMB_IRB_WGS_20221020_ANV5_202304211525', pop), - bqsrc('e8fc4258', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202403030358'), - bqsrc('77445496', 'CCDG_Baylor_CVD_EOCAD_BioMe_WGS_20221122_ANV5_202304242226', pop), - bqsrc('1b0d6b90', 'CCDG_Baylor_CVD_HHRC_Brownsville_GRU_WGS_20221122_ANV5_202304242228', pop), - bqsrc('373b7918', 'CCDG_Baylor_CVD_HemStroke_BNI_HMB_WGS_20221215_ANV5_202304242306', pop), - bqsrc('efc3e806', 'CCDG_Baylor_CVD_HemStroke_Duke_DS_WGS_20221117_ANV5_202304242122', pop), - bqsrc('1044f96d', 'CCDG_Baylor_CVD_HemStroke_ERICH_WGS_20221207_ANV5_202304271256', pop), - bqsrc('f23a6ec8', 'CCDG_Baylor_CVD_HemStroke_GERFHS_HMB_WGS_20221215_ANV5_202304242307', pop), - bqsrc('de34ca6e', 'CCDG_Baylor_CVD_HemStroke_Regards_DS_WGS_20221117_ANV5_202304242123', pop), - bqsrc('d9c6f406', 'CCDG_Baylor_CVD_HemStroke_Yale_HMB_WGS_20221215_ANV5_202304242309', pop), - bqsrc('56883e56', 'CCDG_Baylor_CVD_Oregon_SUDS_GRU_WGS_20221215_ANV5_202304242302', pop), - bqsrc('7f3ba7ec', 'CCDG_Baylor_CVD_TexGen_DS_WGS_20221117_ANV5_202304242125', pop), - bqsrc('da965e26', 'CCDG_Baylor_CVD_Ventura_Presto_GRU_IRB_WGS_20221117_ANV5_202304242127', pop), - bqsrc('40647d03', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20240113_ANV5_202401141252'), - bqsrc('83339911', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20240113_ANV5_202401141259'), - bqsrc('3f36066b', 'CCDG_Broad_AI_IBD_Cho_WGS_20240113_ANV5_202403030543'), - bqsrc('65e890b6', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20240113_ANV5_202403030551'), - bqsrc('cec499cd', 'CCDG_Broad_AI_IBD_McCauley_WGS_20240114_ANV5_202403030559'), - bqsrc('8043de16', 'CCDG_Broad_AI_IBD_McGovern_WGS_20240113_ANV5_202403030608'), - bqsrc('de3bfd4e', 'CCDG_Broad_AI_IBD_Newberry_WGS_20240113_ANV5_202403030616'), - bqsrc('ed109b2f', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_Arrays_20230612_ANV5_202306131350', pop), - bqsrc('3d8b62d7', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_WES_20221025_ANV5_202304241856', pop), - bqsrc('450ba911', 'CCDG_Broad_CVD_AF_ENGAGE_DS_WES_20230418_ANV5_202304210808', pop), - bqsrc('0768a322', 'CCDG_Broad_CVD_AF_Ellinor_MGH_Arrays_20221024_ANV5_202304211831', pop), - bqsrc('dfabf632', 'CCDG_Broad_CVD_AF_Ellinor_MGH_WES_20221117_ANV5_202304271354', pop), - bqsrc('485eb707', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20230128_ANV5_202304271554', pop), - bqsrc('58dffe5a', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20221103_ANV5_202304242105', pop), - bqsrc('cf7f2c0c', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20221103_ANV5_202304242107', pop), - bqsrc('f896734e', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20221102_ANV5_202304242039', pop), - bqsrc('40c2f4f4', 'CCDG_Broad_CVD_AF_Marcus_UCSF_WES_20221222_ANV5_202304242040', pop), - bqsrc('67117555', 'CCDG_Broad_CVD_AF_Rienstra_WES_20221222_ANV5_202304242035', pop), - bqsrc('c45dd622', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_Arrays_20221103_ANV5_202304242110', pop), - bqsrc('b12d2e52', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_WES_20230118_ANV5_202304242112', pop), - bqsrc('d795027d', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20221020_ANV5_202304211823', pop), - bqsrc('642829f3', 'CCDG_Broad_CVD_AF_VAFAR_WES_20221024_ANV5_202304211826', pop), - bqsrc('43f6230a', 'CCDG_Broad_CVD_AFib_AFLMU_WGS_20231008_ANV5_202310091911', pop), - bqsrc('2b135baf', 'CCDG_Broad_CVD_AFib_MGH_WGS_20221024_ANV5_202304211829', pop), - bqsrc('de64d25a', 'CCDG_Broad_CVD_AFib_UCSF_WGS_20221222_ANV5_202304242037', pop), - bqsrc('08216a2c', 'CCDG_Broad_CVD_AFib_Vanderbilt_Ablation_WGS_20221020_ANV5_202304211819', pop), - bqsrc('342c77f2', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202312122054'), - bqsrc('a16f8bac', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202403030943'), - bqsrc('f2179275', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202403030955'), - bqsrc('e8ee6358', 'CCDG_Broad_CVD_EOCAD_VIRGO_WGS_20221024_ANV5_202403031003'), - bqsrc('383d9d9b', 'CCDG_Broad_CVD_PROMIS_GRU_WES_20230418_ANV5_202306211912', pop), - bqsrc('318ae48e', 'CCDG_Broad_CVD_Stroke_BRAVE_WGS_20221107_ANV5_202304241543', pop), - bqsrc('7ea7a6e9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20221107_ANV5_202304241545', pop), - bqsrc('5df71da4', 'CCDG_Broad_MI_InStem_WES_20221122_ANV5_202304242236', pop), - bqsrc('1793828c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSRS_WES_20230324_ANV5_202304241752', pop), - bqsrc('0db6105c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202402020211'), - bqsrc('70c803d7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20221117_ANV5_202304271400', pop), - bqsrc('1b92691d', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202403031115'), - bqsrc('f5a4a895', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20221117_ANV5_202304271358', pop), - bqsrc('3da39a32', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202403031123'), - bqsrc('b8b8ba44', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20221117_ANV5_202304271356', pop), - bqsrc('b3e42c63', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202403031131'), - bqsrc('a2b20d71', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202402020256'), - bqsrc('f85048a3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20221117_ANV5_202304271401', pop), - bqsrc('b3ef2bd3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202403031140'), - bqsrc('1cafba94', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202402020305'), - bqsrc('006c9286', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202402020314'), - bqsrc('92905a2b', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20221117_ANV5_202304271403', pop), - bqsrc('33e1bed9', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202402020322'), - bqsrc('3f3ad5c7', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20230118_ANV5_202304271404', pop), - bqsrc('b2a5eccc', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202403031148'), - bqsrc('7a7b911a', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202402020339'), - bqsrc('33634ed0', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202402020347'), - bqsrc('47f93bbb', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202403031156'), - bqsrc('389af3b3', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20221102_ANV5_202403031205'), - bqsrc('ac8e01aa', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20230328_ANV5_202403031213'), - bqsrc('5d4aa202', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202403031222'), - bqsrc('bd066b5a', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20230328_ANV5_202403031231'), - bqsrc('17de3c3b', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202403031239'), - bqsrc('46e7e2ab', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20230328_ANV5_202403031247'), - bqsrc('ba863f29', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20221102_ANV5_202403031256'), - bqsrc('113d9969', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202403031305'), - bqsrc('fd6d20c8', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20230328_ANV5_202403031313'), - bqsrc('55d32c1b', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202403031322'), - bqsrc('844a1ecf', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20221114_ANV5_202403031331'), - bqsrc('1cbd28a5', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20230621_ANV5_202403031340'), - bqsrc('b8b0b663', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202403031348'), - bqsrc('2686a76a', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202403031405'), - bqsrc('05e028a4', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202403031413'), - bqsrc('4a6228be', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20221122_ANV5_202403031421'), - bqsrc('98dddf8f', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20230328_ANV5_202403031430'), - bqsrc('9ed2a64a', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20230110_ANV5_202403031439'), - bqsrc('22a9e8bd', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20221114_ANV5_202403031446'), - bqsrc('517eda47', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20230328_ANV5_202403031454'), - bqsrc('b6e444c4', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20230223_ANV5_202403031503'), - bqsrc('d8145bea', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202403031512'), - bqsrc('67c3b200', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202403031520'), - bqsrc('4476c338', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202403031529'), - bqsrc('5cd83a64', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20221114_ANV5_202403031537'), - bqsrc('5115b904', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202403031547'), - bqsrc('f7fb0742', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202402062057'), - bqsrc('b979e83a', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20230110_ANV5_202403031555'), - bqsrc('54571a90', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20230328_ANV5_202403031603'), - bqsrc('5495da63', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20230110_ANV5_202403031612'), - bqsrc('7275a9bd', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20221114_ANV5_202403031621'), - bqsrc('2c2a7d19', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202403031629'), - bqsrc('edbd02ca', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202403031637'), - bqsrc('225a7340', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202403031645'), - bqsrc('97dadba8', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202402020902'), - bqsrc('6dcb5d39', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202403031701'), - bqsrc('fb4ac7d8', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20221215_ANV5_202402062059'), - bqsrc('5de241b3', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20221207_ANV5_202403031709'), - bqsrc('62a84074', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20221116_ANV5_202403031719'), - bqsrc('7c06247a', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202402062101'), - bqsrc('9042eb4a', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202403031727'), - bqsrc('cb75258b', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202403031735'), - bqsrc('744bc858', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202403031744'), - bqsrc('faff5b2b', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202403031752'), - bqsrc('275b2a46', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20221205_ANV5_202403031800'), - bqsrc('5a548fd8', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202403031809'), - bqsrc('999301d3', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202402021048'), - bqsrc('eda3f720', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20221207_ANV5_202403031817'), - bqsrc('d9e55ea0', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20221207_ANV5_202403031826'), - bqsrc('6a627e94', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20221215_ANV5_202403031834'), - bqsrc('bfa59a11', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20240201_ANV5_202403031842'), - bqsrc('f8d5318a', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202402021131'), - bqsrc('4ef1d979', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20230612_ANV5_202402021139'), - bqsrc('5e00a0df', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202402062105'), - bqsrc('10948836', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20221114_ANV5_202403031858'), - bqsrc('0a247e9e', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202403031906'), - bqsrc('154b4ef8', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202403031915'), - bqsrc('07b8d88c', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20221207_ANV5_202402021226'), - bqsrc('1985a01d', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20221102_ANV5_202403031942'), - bqsrc('ad61c47e', 'CCDG_NHGRI_Broad_ASD_Daly_phs000298_WES_vcf_20230403_ANV5_202304271610', pop), - bqsrc('5e719362', 'CCDG_NYGC_AI_Asthma_Gala2_WGS_20230605_ANV5_202306131248', pop), - bqsrc('2734a0e4', 'CCDG_NYGC_NP_Alz_EFIGA_WGS_20230605_ANV5_202306141705', pop), - bqsrc('710fc60d', 'CCDG_NYGC_NP_Alz_LOAD_WGS_20230605_ANV5_202306131256', pop), - bqsrc('9626b3eb', 'CCDG_NYGC_NP_Alz_WHICAP_WGS_20230605_ANV5_202306131303', pop), - bqsrc('86bb81c0', 'CCDG_NYGC_NP_Autism_ACE2_GRU_MDS_WGS_20230605_ANV5_202403032029'), - bqsrc('85674dce', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202403081651'), - bqsrc('7d1461b2', 'CCDG_NYGC_NP_Autism_SSC_WGS_20230605_ANV5_202403032206'), - bqsrc('25ec7b57', 'CCDG_WASHU_PAGE_20221220_ANV5_202304271544', pop), - bqsrc('15645b8d', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202403040118'), - bqsrc('4a0769c7', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202403040126'), - bqsrc('b9222139', 'CMG_BROAD_BRAIN_ENGLE_WES_20240205_ANV5_202402051624', pop), - bqsrc('7e094253', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202402281543', pop), - bqsrc('c797490f', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202402281552', pop), - bqsrc('0a21cbfd', 'CMG_BaylorHopkins_HMB_IRB_NPU_WES_20221020_ANV5_202402290528'), - bqsrc('d321333c', 'CMG_BaylorHopkins_HMB_NPU_WES_20230525_ANV5_202402290537'), - bqsrc('0a1360b1', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202402290547', pop), - bqsrc('faa71b49', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202402290555', pop), - bqsrc('abce6387', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202402290606', pop), - bqsrc('3dd4d039', 'CMG_Broad_Brain_Gleeson_WES_20221117_ANV5_202304241517', pop), - bqsrc('c361373f', 'CMG_Broad_Brain_Muntoni_WES_20221102_ANV5_202304241527', pop), - bqsrc('fc6ce406', 'CMG_Broad_Brain_NeuroDev_WES_20240112_ANV5_202401152208'), - bqsrc('d7bfafc6', 'CMG_Broad_Brain_Thaker_WES_20221102_ANV5_202304241531', pop), - bqsrc('7e03b5fd', 'CMG_Broad_Brain_Walsh_WES_20230605_ANV5_202310101734', pop), - bqsrc('29812b42', 'CMG_Broad_Eye_Pierce_WES_20221205_ANV5_202304242250', pop), - bqsrc('48134558', 'CMG_Broad_Eye_Pierce_WGS_20221117_ANV5_202304241507', pop), - bqsrc('36ebaa12', 'CMG_Broad_Heart_PCGC_Tristani_WGS_20221025_ANV5_202304211840', pop), - bqsrc('f9826139', 'CMG_Broad_Heart_Seidman_WES_20221117_ANV5_202304241504', pop), - bqsrc('85952af8', 'CMG_Broad_Kidney_Hildebrandt_WES_20230525_ANV5_202305251733', pop), - bqsrc('ee4ae9a1', 'CMG_Broad_Kidney_Hildebrandt_WGS_20221025_ANV5_202304211844', pop), - bqsrc('cf168274', 'CMG_Broad_Kidney_Pollak_WES_20221025_ANV5_202304211846', pop), - bqsrc('4d47ba2c', 'CMG_Broad_Muscle_Beggs_WGS_20221102_ANV5_202304241533', pop), - bqsrc('82d1271a', 'CMG_Broad_Muscle_Bonnemann_WES_20221117_ANV5_202304241509', pop), - bqsrc('6be3fb25', 'CMG_Broad_Muscle_Bonnemann_WGS_20221117_ANV5_202304241510', pop), - bqsrc('b168eb10', 'CMG_Broad_Muscle_KNC_WES_20221116_ANV5_202304242219', pop), - bqsrc('372244aa', 'CMG_Broad_Muscle_KNC_WGS_20221117_ANV5_202304242221', pop), - bqsrc('c43e7400', 'CMG_Broad_Muscle_Kang_WES_20230525_ANV5_202310101649', pop), - bqsrc('77a6c0aa', 'CMG_Broad_Muscle_Kang_WGS_20221025_ANV5_202304211849', pop), - bqsrc('4153ad1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202402291926', pop), - bqsrc('5019143b', 'CMG_Broad_Muscle_Myoseq_WES_20230621_ANV5_202306211852', pop), - bqsrc('27eb651a', 'CMG_Broad_Muscle_Myoseq_WGS_20221208_ANV5_202304271310', pop), - bqsrc('c087af7a', 'CMG_Broad_Muscle_OGrady_WES_20221205_ANV5_202304242252', pop), - bqsrc('db987a2e', 'CMG_Broad_Muscle_Ravenscroft_WES_20221208_ANV5_202304271311', pop), - bqsrc('05df566c', 'CMG_Broad_Muscle_Topf_WES_20221208_ANV5_202304271313', pop), - bqsrc('87d91f06', 'CMG_Broad_Orphan_Chung_WES_20221102_ANV5_202304241534', pop), - bqsrc('25f6b696', 'CMG_Broad_Orphan_Estonia_Ounap_WES_20221117_ANV5_202304241512', pop), - bqsrc('c3b16b41', 'CMG_Broad_Orphan_Estonia_Ounap_WGS_20221205_ANV5_202304242255', pop), - bqsrc('5bbb5a28', 'CMG_Broad_Orphan_Jueppner_WES_20240205_ANV5_202402051640', pop), - bqsrc('32fe2260', 'CMG_Broad_Orphan_Lerner_Ellis_WES_20221102_ANV5_202304241536', pop), - bqsrc('6f9e574e', 'CMG_Broad_Orphan_Manton_WES_20221117_ANV5_202304241513', pop), - bqsrc('53cd689b', 'CMG_Broad_Orphan_Manton_WGS_20221117_ANV5_202304241515', pop), - bqsrc('e7c5babf', 'CMG_Broad_Orphan_Scott_WES_20221025_ANV5_202304241458', pop), - bqsrc('051877f4', 'CMG_Broad_Orphan_Sweetser_WES_20221102_ANV5_202304241539', pop), - bqsrc('555c7706', 'CMG_Broad_Orphan_VCGS_White_WES_20221018_ANV5_202304241522', pop), - bqsrc('3a8f7952', 'CMG_Broad_Orphan_VCGS_White_WGS_20221117_ANV5_202304241523', pop), - bqsrc('b699c5e3', 'CMG_Broad_Rare_RGP_WES_20221102_ANV5_202304241540', pop), - bqsrc('2d5bd095', 'CMG_Broad_Stillbirth_Wilkins_Haug_WES_20221102_ANV5_202304241542', pop), - bqsrc('db7353fb', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20230419_ANV5_202304201858', pop), - bqsrc('3b8ef67a', 'CMG_UWASH_DS_BDIS_20230418_ANV5_202304201958', pop), - bqsrc('5d27ebfe', 'CMG_UWASH_DS_HFA_20230418_ANV5_202304201932', pop), - bqsrc('9d1a6e0a', 'CMG_UWASH_DS_NBIA_20230418_ANV5_202304201949', pop), - bqsrc('18bd3df4', 'CMG_UWASH_HMB_20230418_ANV5_202402070029', pop), - bqsrc('50484f86', 'CMG_UWASH_HMB_IRB_20230418_ANV5_202304201915', pop), - bqsrc('74bd0964', 'CMG_UWash_DS_EP_20230419_ANV5_202304201906', pop), - bqsrc('6f4155f2', 'CMG_UWash_GRU_20240301_ANV5_202403040330', pop), - bqsrc('6486ae96', 'CMG_UWash_GRU_1_20240113_ANV5_202401141440', pop), - bqsrc('97ec5366', 'CMG_UWash_GRU_IRB_20230418_ANV5_202304201940', pop), - bqsrc('cb305c8e', 'CMG_YALE_DS_MC_20221026_ANV5_202402281611'), - bqsrc('c2897355', 'CMG_Yale_DS_BPEAKD_20240113_ANV5_202401141447'), - bqsrc('4b5667f8', 'CMG_Yale_DS_RD_20240113_ANV5_202401141453'), - bqsrc('9e86cb23', 'CMG_Yale_DS_THAL_IRB_20240113_ANV5_202401141500'), - bqsrc('278252c3', 'CMG_Yale_HMB_IRB_20240113_ANV5_202401141507'), - bqsrc('eea2a20c', 'CMH_GAFK_10X_Genomics_20240304_ANV5_202403071539'), - bqsrc('0e0bf0f8', 'CMH_GAFK_ES_20240301_ANV5_202403040338'), - bqsrc('9935aa3f', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202403121355'), - bqsrc('d391ce5f', 'CMH_GAFK_IsoSeq_20240113_ANV5_202402062116'), - bqsrc('beef6734', 'CMH_GAFK_MGI_20240304_ANV5_202403071559'), - bqsrc('8599b1fb', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202403121402'), - bqsrc('94f58e6c', 'CMH_GAFK_SCATAC_20221107_ANV5_202402290954'), - bqsrc('5447de30', 'CMH_GAFK_WGBS_20230327_ANV5_202402062120'), - bqsrc('db73a316', 'CMH_GAFK_WGS_20240113_ANV5_202402062123'), - bqsrc('5227851b', 'CSER_ClinSeq_GRU_20240401_ANV5_202404081541'), - bqsrc('1a706b0c', 'GTEx_Somatic_WGS_20240116_ANV5_202401170147', pop), - bqsrc('8a98bcb4', 'NIMH_Broad_ConvNeuro_McCarroll_Nehme_Levy_CIRM_DS_Village_20240405_ANV5_202404081511'), - bqsrc('c02a5efb', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_VillageData_20230109_ANV5_202402292203'), - bqsrc('817f27aa', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202402081755'), - bqsrc('ddc1d72b', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202402292209'), - bqsrc('14f5afa3', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_10XLRGenomes_20221115_ANV5_202310101713', pop), - bqsrc('69e4bc19', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202401152215'), - bqsrc('da595e23', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20240112_ANV5_202401141541'), - bqsrc('94091a22', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_10XLRGenomes_20230331_ANV5_202310101715', pop), - bqsrc('df20901c', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202403071610'), - bqsrc('75e17b99', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202402292215'), - bqsrc('25790186', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202403040429', pop), - bqsrc('b371989b', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202403040437', pop), - bqsrc('4a4eec27', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202403040445', pop), - bqsrc('a1f917db', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202403040453', pop), - bqsrc('6264931f', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202403040500', pop), - bqsrc('f3817357', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20221102_ANV5_202403040528'), - bqsrc('23635d1c', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20221024_ANV5_202403040537'), - bqsrc('ecf311e7', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20221117_ANV5_202403040544'), - bqsrc('90923a9d', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20231013_ANV5_202403040600'), - bqsrc('2354d65a', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202402291144', pop), - bqsrc('efc0eb70', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20231013_ANV5_202403040645'), - bqsrc('d1f95953', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20221024_ANV5_202402291153'), - bqsrc('5590427b', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20221024_ANV5_202402291115'), - bqsrc('3cbe3dd3', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20221116_ANV5_202403040753'), - bqsrc('a245d786', 'ccdg_asc_ndd_daly_talkowski_palotie_asd_exome_20221019_ANV5_202403040815', pop), - bqsrc('104705f5', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20221108_ANV5_202403040831'), - bqsrc('a07262c0', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20221025_ANV5_202403040839'), - bqsrc('418e64c1', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20221027_ANV5_202403040854'), - bqsrc('cfe20662', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20230327_ANV5_202403040909'), - bqsrc('7c668a5c', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20230324_ANV5_202403040917'), - bqsrc('0b4c3cfb', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202402291202', pop), - bqsrc('2571477f', 'ccdg_broad_ai_ibd_daly_burnstein_gsa_20240103_ANV5_202401112154'), - bqsrc('c0abacf6', 'ccdg_broad_ai_ibd_daly_chen_gsa_20240103_ANV5_202401112202'), - bqsrc('c7473b33', 'ccdg_broad_ai_ibd_daly_chen_wes_20240103_ANV5_202403040940'), - bqsrc('ac30439c', 'ccdg_broad_ai_ibd_daly_cho_niddk_gsa_20240103_ANV5_202401112215'), - bqsrc('267ea46f', 'ccdg_broad_ai_ibd_daly_chung_gider_gsa_20240103_ANV5_202401121413'), - bqsrc('c481c20f', 'ccdg_broad_ai_ibd_daly_chung_gider_wes_20240103_ANV5_202403040947'), - bqsrc('938f9e89', 'ccdg_broad_ai_ibd_daly_faubion_share_gsa_20240104_ANV5_202401121427'), - bqsrc('d4b1264d', 'ccdg_broad_ai_ibd_daly_faubion_share_wes_20240104_ANV5_202403040954'), - bqsrc('4d149951', 'ccdg_broad_ai_ibd_daly_franchimont_gsa_20240104_ANV5_202401121441'), - bqsrc('e12ce5bd', 'ccdg_broad_ai_ibd_daly_franchimont_wes_20240104_ANV5_202403041001'), - bqsrc('2c7e5905', 'ccdg_broad_ai_ibd_daly_hyams_protect_gsa_20240311_ANV5_202403121623'), - bqsrc('f5463526', 'ccdg_broad_ai_ibd_daly_kastner_fmf_gsa_20240104_ANV5_202401121503'), - bqsrc('51367192', 'ccdg_broad_ai_ibd_daly_kastner_fmf_nhgri_wes_20240104_ANV5_202401152230'), - bqsrc('7268c3a0', 'ccdg_broad_ai_ibd_daly_kupcinskas_gsa_20240311_ANV5_202403121627'), - bqsrc('51449a60', 'ccdg_broad_ai_ibd_daly_lira_share_wes_20240104_ANV5_202403041035'), - bqsrc('ee1b3121', 'ccdg_broad_ai_ibd_daly_louis_gsa_20240311_ANV5_202403121633'), - bqsrc('083044ec', 'ccdg_broad_ai_ibd_daly_newberry_share_gsa_20240105_ANV5_202401121611'), - bqsrc('10ae29e5', 'ccdg_broad_ai_ibd_daly_newberry_share_wes_20240105_ANV5_202403041117'), - bqsrc('a240ffda', 'ccdg_broad_ai_ibd_daly_niddk_cho_wes_20240105_ANV5_202403041125'), - bqsrc('929acb2a', 'ccdg_broad_ai_ibd_daly_rioux_bitton_igenomed_wes_20240105_ANV5_202401121701'), - bqsrc('fa70ba86', 'ccdg_broad_ai_ibd_daly_rioux_genizon_wes_20240311_ANV5_202403121426'), - bqsrc('6e9030de', 'ccdg_broad_ai_ibd_daly_rioux_igenomed_gsa_20240105_ANV5_202401121709'), - bqsrc('c9265cf7', 'ccdg_broad_ai_ibd_daly_rioux_niddk_gsa_20240108_ANV5_202401121716'), - bqsrc('fe283248', 'ccdg_broad_ai_ibd_daly_rioux_niddk_wes_20240108_ANV5_202403041140'), - bqsrc('3ca098f3', 'ccdg_broad_ai_ibd_daly_sands_msccr_gsa_20240108_ANV5_202401121730'), - bqsrc('fd47ae7f', 'ccdg_broad_ai_ibd_daly_sands_msccr_wes_20240108_ANV5_202403041148'), - bqsrc('4300fbc6', 'ccdg_broad_ai_ibd_daly_silverberg_niddk_gsa_20240108_ANV5_202401121745'), - bqsrc('14285871', 'ccdg_broad_ai_ibd_daly_stampfer_nhs_gsa_20240311_ANV5_202403121637'), - bqsrc('d69ac752', 'ccdg_broad_ai_ibd_daly_stampfer_wes_20240108_ANV5_202403041155'), - bqsrc('268dabf8', 'ccdg_broad_ai_ibd_daly_vermeire_gsa_20240113_ANV5_202402062145'), - bqsrc('636bc565', 'ccdg_broad_ai_ibd_daly_vermeire_wes_20240108_ANV5_202403041203'), - bqsrc('7cc92556', 'ccdg_broad_ai_ibd_daly_xavier_prism_gsa_20240108_ANV5_202402062149'), - bqsrc('6b12cac1', 'ccdg_broad_ai_ibd_daly_xavier_prism_wes_20240108_ANV5_202403041214'), - bqsrc('5d4e150c', 'ccdg_broad_ai_ibd_daly_xavier_share_gsa_20240108_ANV5_202401121819'), - bqsrc('e30e7797', 'ccdg_broad_ai_ibd_daly_xavier_share_wes_20240108_ANV5_202403041224'), - bqsrc('597e5f25', 'ccdg_broad_ai_ibd_niddk_daly_duerr_wes_20240112_ANV5_202403041241'), - bqsrc('2f8b185b', 'ccdg_broad_ai_ibd_niddk_daly_silverberg_wes_20240112_ANV5_202403041250'), - bqsrc('7a0883a4', 'ccdg_broad_cvd_af_pegasus_hmb_20221025_ANV5_202403030736', pop), - bqsrc('f62c5ebd', 'ccdg_broad_cvd_eocad_promis_wgs_20221213_ANV5_202403030935', pop), - bqsrc('9d116a5c', 'ccdg_broad_mi_atvb_ds_cvd_wes_20221025_ANV5_202403031035', pop), - bqsrc('6c0a5f0d', 'ccdg_broad_mi_univutah_ds_cvd_wes_20221026_ANV5_202403031059'), - bqsrc('235663ab', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202403031924'), - bqsrc('81cf50b1', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202403031933'), - bqsrc('e6801146', 'ccdg_nygc_np_autism_hmca_wgs_20221024_ANV5_202403032115'), - bqsrc('64b26798', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202403032311'), - bqsrc('e3065356', 'ccdg_washu_cvd_eocad_biome_wgs_20221024_ANV5_202304211601', pop), - bqsrc('01e3396c', 'ccdg_washu_cvd_eocad_cleveland_wgs_20221024_ANV5_202403040008'), - bqsrc('5e62ca4f', 'ccdg_washu_cvd_eocad_emerge_wgs_20221024_ANV5_202403040026'), - bqsrc('a0d77559', 'ccdg_washu_cvd_eocad_emory_wgs_20221024_ANV5_202403040034'), - bqsrc('33e3428b', 'ccdg_washu_cvd_np_ai_controls_vccontrols_wgs_20221024_ANV5_202403032319', pop), - bqsrc('17c5f983', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202402290614', pop), - bqsrc('1cb73890', 'cmg_broad_heart_ware_wes_20221215_ANV5_202304242145', pop), - bqsrc('833ff0a3', 'eMERGE_GRU_IRB_NPU_eMERGEseq_20230130_ANV5_202304271614', pop), - bqsrc('baf040af', 'eMERGE_GRU_IRB_PUB_NPU_eMERGEseq_20230130_ANV5_202304271616', pop), - bqsrc('270b3b62', 'eMERGE_GRU_IRB_eMERGEseq_20230130_ANV5_202304271613', pop), - bqsrc('c13efbe9', 'eMERGE_GRU_NPU_eMERGEseq_20230130_ANV5_202304271617', pop), - bqsrc('34f8138d', 'eMERGE_GRU_eMERGEseq_20230130_ANV5_202304271612', pop), - bqsrc('90b7b6e8', 'eMERGE_HMB_GSO_eMERGEseq_20230130_ANV5_202304271621', pop), - bqsrc('6e6dca92', 'eMERGE_HMB_IRB_PUB_eMERGEseq_20230130_ANV5_202304271622', pop), - bqsrc('1ddf2a8e', 'eMERGE_HMB_NPU_eMERGEseq_20230130_ANV5_202304271624', pop), - bqsrc('dba97a65', 'eMERGE_HMB_eMERGEseq_20230130_ANV5_202304271619', pop), - bqsrc('51aa9a22', 'eMERGE_PGRNseq_20230118_ANV5_202304241853', pop), - bqsrc('ce8c469f', 'eMERGE_PRS_Arrays_20221220_ANV5_202304271346', pop), - bqsrc('bf91a039', 'nhgri_broad_ibd_daly_winter_wes_20240112_ANV5_202403041315'), -])) - -anvil7_sources = mkdict(anvil6_sources, 256, mkdelta([ - bqsrc('c9e438dc', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202406261957'), - bqsrc('90a1d452', 'GREGoR_R01_GRU_20240208_ANV5_202407011515'), - bqsrc('c27c13db', 'GREGoR_R01_HMB_20240208_ANV5_202407011529'), - bqsrc('3594cc06', 'HPRC_20240401_ANV5_202406261913'), - bqsrc('49f55ff6', 'NIMH_Broad_WGSPD1_McCarroll_Light_DS_WGS_20240625_ANV5_202406262032'), - bqsrc('54040f7f', 'T2T_CHRY_20240301_ANV5_202406271432'), - bqsrc('5048eadd', 'ccdg_broad_ai_ibd_daly_brant_burnstein_utsw_wes_20240627_ANV5_202406271535'), - bqsrc('5d003f44', 'ccdg_broad_daly_igsr_1kg_twist_wes_20240625_ANV5_202406261904') -])) - -anvil8_sources = mkdict(anvil7_sources, 254, mkdelta([ - bqsrc('6fd2f543', '1000G_PRIMED_data_model_20240410_ANV5_202409251724'), - bqsrc('13858a9f', '1000G_high_coverage_2019_20230517_ANV5_202409231755'), - bqsrc('f954ce44', 'African_American_Seq_HGV_20230727_ANV5_202409251735'), - bqsrc('1c288bc8', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202409231808'), - bqsrc('69a5161a', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20240113_ANV5_202409302325'), - bqsrc('87ae3152', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20240113_ANV5_202410011417'), - bqsrc('0de3f19d', 'CCDG_Broad_AI_IBD_Cho_WGS_20240113_ANV5_202409261925'), - bqsrc('183d3f73', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20240113_ANV5_202409261935'), - bqsrc('f82d1472', 'CCDG_Broad_AI_IBD_McGovern_WGS_20240113_ANV5_202409262009'), - bqsrc('a173fc34', 'CCDG_Broad_AI_IBD_Newberry_WGS_20240113_ANV5_202409262020'), - bqsrc('b0ce674b', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202410011428'), - bqsrc('f88712d7', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202409262029'), - bqsrc('948779f6', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202409251741'), - bqsrc('ed6900d9', 'CCDG_Broad_CVD_EOCAD_VIRGO_WGS_20221024_ANV5_202409251751'), - bqsrc('38d33c51', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202410011452'), - bqsrc('a6afe2df', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202409262047'), - bqsrc('9bf401b1', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202409262056'), - bqsrc('573b1bec', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202409262105'), - bqsrc('eeaf2d1a', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202410011503'), - bqsrc('3b3be681', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202409262116'), - bqsrc('b8d6b994', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202410011513'), - bqsrc('49cabb98', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202410011523'), - bqsrc('afcff545', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202410011533'), - bqsrc('2df70f51', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202409262125'), - bqsrc('ab5c8456', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202410011544'), - bqsrc('a1b27d24', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202410011554'), - bqsrc('3f080a87', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202409262134'), - bqsrc('465bfaac', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20221102_ANV5_202409262144'), - bqsrc('ccb8a4b7', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20230328_ANV5_202409301706'), - bqsrc('015bb538', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202409262231'), - bqsrc('435613ab', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20230328_ANV5_202409262240'), - bqsrc('2f73c7c1', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202409262249'), - bqsrc('3841aefa', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20230328_ANV5_202409262258'), - bqsrc('99470817', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20221102_ANV5_202409262308'), - bqsrc('46e142ab', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202409262318'), - bqsrc('2648f51f', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20230328_ANV5_202409262327'), - bqsrc('b749c687', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202409262337'), - bqsrc('5d23c09d', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20221114_ANV5_202409262346'), - bqsrc('548a0b21', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20230621_ANV5_202409262355'), - bqsrc('7a345902', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202409270005'), - bqsrc('9f144aec', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202409251801'), - bqsrc('813188f4', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202409301252'), - bqsrc('dca2300f', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202409301302'), - bqsrc('6f0e0649', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20221122_ANV5_202409301311'), - bqsrc('6d3907bb', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20230328_ANV5_202409301321'), - bqsrc('636e501f', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20230110_ANV5_202409231955'), - bqsrc('5271045a', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20221114_ANV5_202409301332'), - bqsrc('941ca2d1', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20230328_ANV5_202409301342'), - bqsrc('d6a4eda2', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20230223_ANV5_202409301352'), - bqsrc('174f3d1e', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202409301402'), - bqsrc('6cb9a7a7', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202409301413'), - bqsrc('87e7f1b9', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202409301423'), - bqsrc('cbfb79d8', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20221114_ANV5_202409301433'), - bqsrc('9bbfe25c', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202409301444'), - bqsrc('d4950205', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202410011604'), - bqsrc('12cc5629', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20230110_ANV5_202409301453'), - bqsrc('b119a402', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20230328_ANV5_202409301503'), - bqsrc('7354f3d2', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20230110_ANV5_202409301514'), - bqsrc('00271874', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20221114_ANV5_202409301526'), - bqsrc('d0749ece', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202409301537'), - bqsrc('52be6def', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202409301547'), - bqsrc('8629a23a', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202409301557'), - bqsrc('c6b049b2', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202410011615'), - bqsrc('1b2e88a4', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202409301607'), - bqsrc('798646a1', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20221215_ANV5_202410011625'), - bqsrc('83ec96c3', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20221207_ANV5_202409301617'), - bqsrc('1e1218b7', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20221116_ANV5_202409301627'), - bqsrc('e421074a', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202410011634'), - bqsrc('a7b2b8bc', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202409232006'), - bqsrc('9dc6e713', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202409232016'), - bqsrc('69531ad0', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202409232025'), - bqsrc('a1fbb513', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202409301638'), - bqsrc('797b067f', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20221205_ANV5_202409301648'), - bqsrc('acd1fc6d', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202409301657'), - bqsrc('e44dfa03', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202410011646'), - bqsrc('5521223c', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20221207_ANV5_202409302000'), - bqsrc('c5ca49db', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20221207_ANV5_202409302009'), - bqsrc('8a9bc88a', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20221215_ANV5_202409302018'), - bqsrc('302dbf9e', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20240201_ANV5_202409302028'), - bqsrc('2d22bf8b', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202410011657'), - bqsrc('23486b33', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20230612_ANV5_202410011708'), - bqsrc('76142d3b', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202410011719'), - bqsrc('3e1c2a3e', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20221114_ANV5_202409302037'), - bqsrc('fd6ee483', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202409302046'), - bqsrc('416b4095', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202409302055'), - bqsrc('a8099f9d', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20221207_ANV5_202410011732'), - bqsrc('7529b6b1', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20221102_ANV5_202409232034'), - bqsrc('27e59539', 'CCDG_NYGC_NP_Autism_ACE2_DS_MDS_WGS_20230605_ANV5_202409302125'), - bqsrc('37c78fc4', 'CCDG_NYGC_NP_Autism_ACE2_GRU_MDS_WGS_20230605_ANV5_202409232043'), - bqsrc('e3ebc7f0', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202410011742'), - bqsrc('1c1b8f44', 'CCDG_NYGC_NP_Autism_CAG_DS_WGS_20230605_ANV5_202409232053'), - bqsrc('f32ef49b', 'CCDG_NYGC_NP_Autism_HFA_DS_WGS_20230605_ANV5_202409232102'), - bqsrc('82575f4a', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_DS_WGS_20221103_ANV5_202409232112'), - bqsrc('25858a7b', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_GRU_WGS_20221103_ANV5_202409241351'), - bqsrc('8302ff1f', 'CCDG_NYGC_NP_Autism_SAGE_WGS_20230605_ANV5_202409302144'), - bqsrc('443d8d20', 'CCDG_NYGC_NP_Autism_SSC_WGS_20230605_ANV5_202409302154'), - bqsrc('660280f8', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202409302255'), - bqsrc('4a0769c7', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202403040126', pop), - bqsrc('c2a2b724', 'CMG_YALE_DS_MC_20221026_ANV5_202409302315'), - bqsrc('f961f617', 'CMG_YALE_DS_RARED_20221020_ANV5_202409251714'), - bqsrc('5d222190', 'CMG_Yale_DS_BPEAKD_20240113_ANV5_202410011754'), - bqsrc('fe056168', 'CMG_Yale_DS_RD_20240113_ANV5_202410011804'), - bqsrc('06182245', 'CMG_Yale_DS_THAL_IRB_20240113_ANV5_202410011814'), - bqsrc('ad307392', 'CMG_Yale_GRU_20221020_ANV5_202402281628', pop), - bqsrc('35779fe0', 'CMG_Yale_HMB_20221020_ANV5_202410011825'), - bqsrc('cebe6de0', 'CMG_Yale_HMB_GSO_20221020_ANV5_202410011834'), - bqsrc('5c7f0d2a', 'CMG_Yale_HMB_IRB_20240113_ANV5_202410011846'), - bqsrc('abdbf318', 'CMH_GAFK_10X_Genomics_20240304_ANV5_202409251809'), - bqsrc('a3097787', 'CMH_GAFK_ES_20240301_ANV5_202409251815'), - bqsrc('e4a5f270', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202409251830'), - bqsrc('0eaa72dc', 'CMH_GAFK_GS_long_read_20240301_ANV5_202409251840'), - bqsrc('54e0207f', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202409231642'), - bqsrc('b69c3ccd', 'CMH_GAFK_IsoSeq_20240113_ANV5_202409251851'), - bqsrc('aa13412e', 'CMH_GAFK_MGI_20240304_ANV5_202409251952'), - bqsrc('61c255d4', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202409231650'), - bqsrc('e6bc59ce', 'CMH_GAFK_WGS_20240113_ANV5_202409252004'), - bqsrc('2b8418a9', 'CSER_CHARM_GRU_20240301_ANV5_202410021502'), - bqsrc('3c4e3e42', 'CSER_NYCKIDSEQ_GRU_20240113_ANV5_202409252031'), - bqsrc('bfb01f90', 'CSER_NYCKIDSEQ_HMB_20240113_ANV5_202409252040'), - bqsrc('9fb7b90a', 'CSER_P3EGS_GRU_20230727_ANV5_202409252049'), - bqsrc('0e626b88', 'CSER_SouthSeq_GRU_20221208_ANV5_202410021513'), - bqsrc('c60190e0', 'GREGoR_R01_GRU_20240208_ANV5_202408141711'), - bqsrc('9a665ca1', 'GREGoR_R01_HMB_20240208_ANV5_202408141715'), - bqsrc('14f8d940', 'GTEx_BCM_GRU_CoRSIVs_20240116_ANV5_202409252058'), - bqsrc('44ba7ece', 'GTEx_V8_hg38_20240116_ANV5_202409251632'), - bqsrc('5fd8c286', 'GTEx_public_data_20240117_ANV5_202409252106'), - bqsrc('c1e66f15', 'HPRC_20240401_ANV5_202409251654'), - bqsrc('5dff1da7', 'NIA_CARD_Coriell_Cell_Lines_Open_20230727_ANV5_202410021438'), - bqsrc('65212bf5', 'NIA_CARD_LR_WGS_NABEC_GRU_20230727_ANV5_202410021449'), - bqsrc('9ffeaa82', 'NIMH_Broad_ConvNeuro_McCarroll_Nehme_Levy_CIRM_DS_Village_20240405_ANV5_202409201403'), - bqsrc('7f246585', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202409252122'), - bqsrc('62c7b77b', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202409252132'), - bqsrc('0050666b', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202409252139'), - bqsrc('591f1c1e', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20240112_ANV5_202410011855'), - bqsrc('59c59a28', 'NIMH_Broad_WGSPD1_McCarroll_Light_DS_WGS_20240625_ANV5_202409252147'), - bqsrc('754928da', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_WGS_20240112_ANV5_202410011904'), - bqsrc('cce0dbdc', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202409252156'), - bqsrc('2bf3361f', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202409252204'), - bqsrc('95f60999', 'T2T_20230714_ANV5_202409252214'), - bqsrc('a20ffbf1', 'T2T_CHRY_20240301_ANV5_202409252300'), - bqsrc('c4be3462', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20221102_ANV5_202409241400'), - bqsrc('842d0cc0', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20221024_ANV5_202409241411'), - bqsrc('40c6c06f', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20221117_ANV5_202409241420'), - bqsrc('56136832', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20231013_ANV5_202409241432'), - bqsrc('dd505610', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20221108_ANV5_202409241442'), - bqsrc('b2e1bb0d', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20230327_ANV5_202409241451'), - bqsrc('5382a45d', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20221107_ANV5_202409241501'), - bqsrc('ffb6d106', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20221201_ANV5_202409241511'), - bqsrc('0de6bd0f', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20231013_ANV5_202409241521'), - bqsrc('eced0cb1', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20221107_ANV5_202409241530'), - bqsrc('849f26ad', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20221024_ANV5_202409252359'), - bqsrc('ef7e53e2', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20221107_ANV5_202409241541'), - bqsrc('4155fb6c', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20221107_ANV5_202409241551'), - bqsrc('7ef162bf', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20221108_ANV5_202409241559'), - bqsrc('0aba3c39', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20221109_ANV5_202409241611'), - bqsrc('49fb2096', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20221024_ANV5_202409252350'), - bqsrc('ac3a764d', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20221116_ANV5_202409241620'), - bqsrc('3829964a', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20221108_ANV5_202409241630'), - bqsrc('903678e2', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20221108_ANV5_202409241638'), - bqsrc('9d656629', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20221108_ANV5_202409241649'), - bqsrc('faa3347c', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20221025_ANV5_202409241658'), - bqsrc('2ea97771', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_wgs_20221027_ANV5_202409241814'), - bqsrc('88aae06e', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20221027_ANV5_202409241823'), - bqsrc('08215fa6', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20230327_ANV5_202409241833'), - bqsrc('6f17c190', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20230324_ANV5_202409241842'), - bqsrc('333a3617', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20221108_ANV5_202409241851'), - bqsrc('1e362e1d', 'ccdg_broad_ai_ibd_daly_brant_burnstein_utsw_wes_20240627_ANV5_202409260008'), - bqsrc('8918d261', 'ccdg_broad_ai_ibd_daly_brant_niddk_gsa_20240103_ANV5_202409260018'), - bqsrc('f5fdd89a', 'ccdg_broad_ai_ibd_daly_burnstein_gsa_20240103_ANV5_202409261428'), - bqsrc('91d1ab8e', 'ccdg_broad_ai_ibd_daly_chen_gsa_20240103_ANV5_202409261437'), - bqsrc('acab4546', 'ccdg_broad_ai_ibd_daly_chen_wes_20240103_ANV5_202409241859'), - bqsrc('45c2ba3b', 'ccdg_broad_ai_ibd_daly_cho_niddk_gsa_20240103_ANV5_202409261446'), - bqsrc('185d52bd', 'ccdg_broad_ai_ibd_daly_chung_gider_gsa_20240103_ANV5_202409261456'), - bqsrc('01e1177c', 'ccdg_broad_ai_ibd_daly_chung_gider_wes_20240103_ANV5_202409241909'), - bqsrc('94a46beb', 'ccdg_broad_ai_ibd_daly_faubion_share_gsa_20240104_ANV5_202409261505'), - bqsrc('b2456308', 'ccdg_broad_ai_ibd_daly_faubion_share_wes_20240104_ANV5_202409241919'), - bqsrc('dde3655d', 'ccdg_broad_ai_ibd_daly_franchimont_gsa_20240104_ANV5_202409261515'), - bqsrc('73a081bb', 'ccdg_broad_ai_ibd_daly_franchimont_wes_20240104_ANV5_202409241929'), - bqsrc('f4d731a0', 'ccdg_broad_ai_ibd_daly_hyams_protect_gsa_20240311_ANV5_202409231656'), - bqsrc('21868172', 'ccdg_broad_ai_ibd_daly_hyams_protect_wes_20240104_ANV5_202409241940'), - bqsrc('a6a40cd9', 'ccdg_broad_ai_ibd_daly_kastner_fmf_gsa_20240104_ANV5_202409261525'), - bqsrc('4b41d063', 'ccdg_broad_ai_ibd_daly_kastner_fmf_nhgri_wes_20240104_ANV5_202409261534'), - bqsrc('59084d62', 'ccdg_broad_ai_ibd_daly_kupcinskas_gsa_20240311_ANV5_202409231708'), - bqsrc('e56d71fd', 'ccdg_broad_ai_ibd_daly_kupcinskas_wes_20240104_ANV5_202409241949'), - bqsrc('98f1acc9', 'ccdg_broad_ai_ibd_daly_lewis_sparc_gsa_20240104_ANV5_202409261543'), - bqsrc('5b2de91f', 'ccdg_broad_ai_ibd_daly_lira_share_wes_20240104_ANV5_202409242001'), - bqsrc('144a86c1', 'ccdg_broad_ai_ibd_daly_louis_gsa_20240311_ANV5_202409231721'), - bqsrc('5dd9d83b', 'ccdg_broad_ai_ibd_daly_louis_wes_20240104_ANV5_202409242011'), - bqsrc('f11a2ad0', 'ccdg_broad_ai_ibd_daly_mccauley_wes_20240104_ANV5_202409242021'), - bqsrc('2b74b327', 'ccdg_broad_ai_ibd_daly_mcgovern_niddk_wes_20240104_ANV5_202409242032'), - bqsrc('91cac0e9', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_gsa_20240105_ANV5_202409261605'), - bqsrc('1cde4183', 'ccdg_broad_ai_ibd_daly_newberry_share_gsa_20240105_ANV5_202409261616'), - bqsrc('7a369c1e', 'ccdg_broad_ai_ibd_daly_newberry_share_wes_20240105_ANV5_202409242046'), - bqsrc('db6e29bb', 'ccdg_broad_ai_ibd_daly_niddk_cho_wes_20240105_ANV5_202409242056'), - bqsrc('820a5c30', 'ccdg_broad_ai_ibd_daly_pekow_share_gsa_20240105_ANV5_202409261627'), - bqsrc('69911b0d', 'ccdg_broad_ai_ibd_daly_pekow_share_wes_20240105_ANV5_202409242107'), - bqsrc('77ec2fe1', 'ccdg_broad_ai_ibd_daly_rioux_bitton_igenomed_wes_20240105_ANV5_202409261636'), - bqsrc('08a64b4f', 'ccdg_broad_ai_ibd_daly_rioux_genizon_wes_20240311_ANV5_202409231732'), - bqsrc('13a5c9e0', 'ccdg_broad_ai_ibd_daly_rioux_igenomed_gsa_20240105_ANV5_202409261645'), - bqsrc('d8202699', 'ccdg_broad_ai_ibd_daly_rioux_niddk_gsa_20240108_ANV5_202409261653'), - bqsrc('67df5d1d', 'ccdg_broad_ai_ibd_daly_rioux_niddk_wes_20240108_ANV5_202409242116'), - bqsrc('f0ead8f9', 'ccdg_broad_ai_ibd_daly_sands_msccr_gsa_20240108_ANV5_202409261705'), - bqsrc('f890c249', 'ccdg_broad_ai_ibd_daly_sands_msccr_wes_20240108_ANV5_202409242127'), - bqsrc('065c8f18', 'ccdg_broad_ai_ibd_daly_silverberg_niddk_gsa_20240108_ANV5_202409261715'), - bqsrc('401b1cf7', 'ccdg_broad_ai_ibd_daly_stampfer_nhs_gsa_20240311_ANV5_202409231743'), - bqsrc('034aecb5', 'ccdg_broad_ai_ibd_daly_stampfer_wes_20240108_ANV5_202409251505'), - bqsrc('f3933ea2', 'ccdg_broad_ai_ibd_daly_vermeire_gsa_20240113_ANV5_202409261726'), - bqsrc('dbda69e3', 'ccdg_broad_ai_ibd_daly_vermeire_wes_20240108_ANV5_202409251516'), - bqsrc('d1d3e261', 'ccdg_broad_ai_ibd_daly_xavier_prism_gsa_20240108_ANV5_202409261740'), - bqsrc('1918c027', 'ccdg_broad_ai_ibd_daly_xavier_prism_wes_20240108_ANV5_202409251531'), - bqsrc('900597b7', 'ccdg_broad_ai_ibd_daly_xavier_share_gsa_20240108_ANV5_202409261751'), - bqsrc('e9e9f233', 'ccdg_broad_ai_ibd_daly_xavier_share_wes_20240108_ANV5_202409251548'), - bqsrc('851fd8f7', 'ccdg_broad_ai_ibd_niddk_daly_duerr_wes_20240112_ANV5_202409251558'), - bqsrc('13a30243', 'ccdg_broad_ai_ibd_niddk_daly_silverberg_wes_20240112_ANV5_202409251610'), - bqsrc('eceddedc', 'ccdg_broad_daly_igsr_1kg_twist_wes_20240625_ANV5_202409231828'), - bqsrc('96417715', 'ccdg_broad_mi_univutah_ds_cvd_wes_20221026_ANV5_202409231943'), - bqsrc('ff012258', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202409302105'), - bqsrc('61b6b42b', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202409302116'), - bqsrc('c27e3cda', 'ccdg_nygc_np_autism_hmca_wgs_20221024_ANV5_202409302135'), - bqsrc('f4073027', 'ccdg_nygc_np_autism_tasc_wgs_20221024_ANV5_202409302207'), - bqsrc('6f9855f6', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202409302216'), - bqsrc('95a5e448', 'ccdg_washu_cvd_eocad_cleveland_wgs_20221024_ANV5_202409302226'), - bqsrc('9faffbb3', 'ccdg_washu_cvd_eocad_emerge_wgs_20221024_ANV5_202409302235'), - bqsrc('5d6d4dc4', 'ccdg_washu_cvd_eocad_emory_wgs_20221024_ANV5_202409302245'), - bqsrc('db95c1cc', 'nhgri_broad_ibd_daly_winter_wes_20240112_ANV5_202409251622'), -])) - -anvil9_sources = mkdict(anvil8_sources, 280, mkdelta([ - bqsrc('b555b2f5', '1000G_PRIMED_data_model_20240410_ANV5_202502211647'), - bqsrc('265cde27', 'ALSCompute_Collection_GRU_20231016_ANV5_202410310107'), - bqsrc('853bbf15', 'ALSCompute_Collection_HMB_20241018_ANV5_202410232001'), - bqsrc('5216abda', 'CCDG_Broad_AI_IBD_McCauley_WGS_20240114_ANV5_202502052138'), - bqsrc('078d7ad8', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20250206_ANV5_202502201726'), - bqsrc('ea743ee1', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_HMB_WES_20250206_ANV5_202502201731'), - bqsrc('088ebd14', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20250206_ANV5_202502201736'), - bqsrc('147aff13', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20250206_ANV5_202502201740'), - bqsrc('897ad869', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20250206_ANV5_202502201745'), - bqsrc('ac2033df', 'CCDG_Broad_CVD_AF_Marcus_UCSF_HMB_WES_20250206_ANV5_202502201749'), - bqsrc('190647f7', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20250219_ANV5_202502201753'), - bqsrc('9694bb1c', 'CCDG_Broad_MI_ATVB_DS_CVD_WES_20250206_ANV5_202502201757'), - bqsrc('6a7bfee9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20250206_ANV5_202502201801'), - bqsrc('12048ad1', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202502201806'), - bqsrc('f101bba7', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202502201810'), - bqsrc('1f264a75', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202502201814'), - bqsrc('6d9ad64a', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202502201818'), - bqsrc('724f8958', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202502201822'), - bqsrc('1d96b10b', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202502201826'), - bqsrc('aa7f9c50', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202502201830'), - bqsrc('aef3d233', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202502201834'), - bqsrc('95c60b51', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202502201839'), - bqsrc('b5486758', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202502201843'), - bqsrc('0ef2f4b9', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20250206_ANV5_202502201846'), - bqsrc('e85fc320', 'CMG_UWASH_DS_BDIS_20250206_ANV5_202502201850'), - bqsrc('a5e0fb2a', 'CMG_UWASH_DS_HFA_20250206_ANV5_202502201859'), - bqsrc('28813dc5', 'CMG_UWASH_DS_NBIA_20250206_ANV5_202502201903'), - bqsrc('418e6f5b', 'CMG_UWASH_HMB_20250219_ANV5_202502201916'), - bqsrc('11e44295', 'CMG_UWASH_HMB_IRB_20250219_ANV5_202502201921'), - bqsrc('2c303369', 'CMG_UWash_DS_CHDEF_20250224_ANV5_202502241753'), - bqsrc('0e0af0a8', 'CMG_UWash_DS_EP_20250219_ANV5_202502201854'), - bqsrc('766c47dd', 'CMG_UWash_GRU_20250224_ANV5_202502241706'), - bqsrc('aa22e87e', 'CMG_UWash_GRU_IRB_20250224_ANV5_202502241723'), - bqsrc('7c44dbc8', 'CMH_GAFK_ES_20240301_ANV5_202502201925'), - bqsrc('97f2fa00', 'CMH_GAFK_GS_long_read_20240301_ANV5_202502201932'), - bqsrc('456a8996', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202502201937'), - bqsrc('9263e232', 'CMH_GAFK_IsoSeq_20240113_ANV5_202502201941'), - bqsrc('ef718b6b', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202502201945'), - bqsrc('de339830', 'CMH_GAFK_WGS_20240113_ANV5_202502201948'), - bqsrc('706dd75a', 'DepMap_HMB_20240827_ANV5_202410240027'), - bqsrc('cab8b4b3', 'DepMap_HMB_R2_20250224_ANV5_202502241800'), - bqsrc('d4765cad', 'AnVIL_ENCORE_RS293_20250304_ANV5_202503042020'), - bqsrc('e944e571', 'GREGoR_R01_GRU_20240208_ANV5_202502202158'), - bqsrc('24806158', 'GREGoR_R01_HMB_20240208_ANV5_202502202202'), - bqsrc('88e6ae93', 'GREGoR_R02_HMB_20241105_ANV5_202502202131'), - bqsrc('46fcac4d', 'MAS_ISO_seq_20240113_ANV5_202409261333'), - bqsrc('2e8d7a0e', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202502202026'), - bqsrc('14a38418', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202502202034'), - bqsrc('91ecebc5', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202502202030'), -])) - -anvil10_sources = mkdict(anvil9_sources, 283, mkdelta([ - bqsrc('6db4e098', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202503171456'), - bqsrc('14967a4d', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202503171543'), - bqsrc('2c000b04', 'FetalGenomics_PrenatalSEQ_20250520_ANV5_202505201718'), - bqsrc('8ae2d6e6', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20250514_ANV5_202505191331'), - bqsrc('1841de51', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20250514_ANV5_202505191529'), - bqsrc('9a9bd879', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20250515_ANV5_202505191729'), - bqsrc('bf0b5c71', 'ccdg_asc_ndd_daly_talkowski_ac_boston_asd_exome_20250507_ANV5_202505080038'), - bqsrc('de55c8ca', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20250514_ANV5_202505191339'), - bqsrc('05f81c53', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20250514_ANV5_202505191347'), - bqsrc('ec751e53', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20250514_ANV5_202505191354'), - bqsrc('e880eb24', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20250514_ANV5_202505191404'), - bqsrc('14218b2f', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20250514_ANV5_202505191412'), - bqsrc('44dd3f7f', 'ccdg_asc_ndd_daly_talkowski_domenici_asd_exome_20250514_ANV5_202505191423'), - bqsrc('b497e3bd', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20250514_ANV5_202505191431'), - bqsrc('23e0cf66', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20250514_ANV5_202505191439'), - bqsrc('dae963c2', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20250514_ANV5_202505191448'), - bqsrc('02a4023f', 'ccdg_asc_ndd_daly_talkowski_herman_asd_exome_20250514_ANV5_202505191456'), - bqsrc('562d7351', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20250514_ANV5_202505191503'), - bqsrc('90758277', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20250514_ANV5_202505191512'), - bqsrc('cd6d2aa6', 'ccdg_asc_ndd_daly_talkowski_hultman_asd_exome_20250514_ANV5_202505191519'), - bqsrc('d21464d2', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20250514_ANV5_202505191537'), - bqsrc('186b5498', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20250514_ANV5_202505191546'), - bqsrc('7fc43dc3', 'ccdg_asc_ndd_daly_talkowski_lattig_asd_exome_20250514_ANV5_202505191554'), - bqsrc('e4ab9e05', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20250515_ANV5_202505191602'), - bqsrc('81d3c9c8', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20250515_ANV5_202505191610'), - bqsrc('0b020eb2', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20250515_ANV5_202505191620'), - bqsrc('f49b0d4c', 'ccdg_asc_ndd_daly_talkowski_minshew_asd_exome_20250515_ANV5_202505191632'), - bqsrc('d0e9181a', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20250515_ANV5_202505191640'), - bqsrc('3965a084', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20250515_ANV5_202505191651'), - bqsrc('c9d758bb', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20250515_ANV5_202505191658'), - bqsrc('45f811c7', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20250515_ANV5_202505191706'), - bqsrc('c3a807a3', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20250515_ANV5_202505191714'), - bqsrc('d610c1c1', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20250515_ANV5_202505191722'), - bqsrc('ea8f0099', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20250515_ANV5_202505191738'), - bqsrc('f143f633', 'ccdg_broad_ai_ibd_daly_mcgovern_share_wes_20240104_ANV5_202503171541'), -])) - -anvil11_sources = mkdict(anvil10_sources, 371, mkdelta([ - bqsrc('afe52c93', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSA_MD_20250718_ANV5_202508070436'), - bqsrc('a0e71864', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20250718_ANV5_202507300051'), - bqsrc('d4e6fade', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20250718_ANV5_202508051209'), - bqsrc('3c8c822c', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20250718_ANV5_202507300117'), - bqsrc('1c40baac', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20250718_ANV5_202508050140'), - bqsrc('6de194a7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20250718_ANV5_202507300109'), - bqsrc('76e40236', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20250718_ANV5_202508050131'), - bqsrc('d1f97905', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20250718_ANV5_202507300059'), - bqsrc('6b600036', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20250718_ANV5_202507300125'), - bqsrc('21abf32b', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20250718_ANV5_202508051217'), - bqsrc('a9edefce', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_NPU_IRB_GSA_MD_20250718_ANV5_202508051225'), - bqsrc('1a3c4c17', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20250718_ANV5_202507311756'), - bqsrc('f297f460', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20250718_ANV5_202507300140'), - bqsrc('d570baa0', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_MDS_NPU_GSA_MD_20250718_ANV5_202508051233'), - bqsrc('e459e688', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_NPU_ADLT_GSA_MD_20250718_ANV5_202508051240'), - bqsrc('4497c1cf', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20250718_ANV5_202507300148'), - bqsrc('661aa0d4', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20250718_ANV5_202508051249'), - bqsrc('1432eb38', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20250718_ANV5_202507300156'), - bqsrc('41cb412f', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20250718_ANV5_202508051256'), - bqsrc('c5c63622', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20250718_ANV5_202507311805'), - bqsrc('0374d242', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_MDS_NPU_GSA_MD_20250718_ANV5_202508051303'), - bqsrc('03d1e104', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20250718_ANV5_202507311814'), - bqsrc('2abd204e', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20250718_ANV5_202507300218'), - bqsrc('4427fad3', 'CCDG_Broad_NP_Epilepsy_CANCAL_GSA_MD_20250718_ANV5_202508051311'), - bqsrc('bde9ccdd', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_GSA_MD_20250718_ANV5_202508051319'), - bqsrc('c01aff87', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20250718_ANV5_202507300230'), - bqsrc('32f47a85', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_GSA_MD_20250718_ANV5_202508051327'), - bqsrc('38c9f789', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20250718_ANV5_202507300237'), - bqsrc('11d221ae', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_GSA_MD_20250718_ANV5_202508051334'), - bqsrc('1e8df3c0', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20250718_ANV5_202507300245'), - bqsrc('605ffd28', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_GSA_MD_20250721_ANV5_202508051342'), - bqsrc('69b2412d', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20250721_ANV5_202507300252'), - bqsrc('c209fbb5', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_GSA_MD_20250721_ANV5_202508051355'), - bqsrc('521fffef', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20250721_ANV5_202507300259'), - bqsrc('89f25203', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_GSA_MD_20250721_ANV5_202508051403'), - bqsrc('3efe17c5', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20250721_ANV5_202507300307'), - bqsrc('1a1d321e', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051412'), - bqsrc('50e7f491', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20250721_ANV5_202507311823'), - bqsrc('d6d892c9', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_GSA_MD_20250721_ANV5_202508051421'), - bqsrc('8ea29289', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20250721_ANV5_202507300323'), - bqsrc('51b097e1', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_GSA_MD_20250721_ANV5_202508051429'), - bqsrc('9b1b6c81', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20250721_ANV5_202507300331'), - bqsrc('bfd88a7f', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051437'), - bqsrc('eadf2c20', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20250721_ANV5_202507311835'), - bqsrc('fdd88715', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20250721_ANV5_202507300347'), - bqsrc('c040bb51', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_MDS_GSA_MD_20250721_ANV5_202508051446'), - bqsrc('d8d5745a', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051454'), - bqsrc('4bb9bad6', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20250721_ANV5_202507300355'), - bqsrc('7205017c', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_GSA_MD_20250721_ANV5_202508051503'), - bqsrc('a1074acf', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20250721_ANV5_202507311844'), - bqsrc('63a1cdbc', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_GSA_MD_20250721_ANV5_202508051511'), - bqsrc('67ab4fc2', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20250721_ANV5_202507300410'), - bqsrc('59478363', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_GSA_MD_20250721_ANV5_202508051519'), - bqsrc('28c93500', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20250721_ANV5_202507300422'), - bqsrc('508e8da9', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_GSA_MD_20250721_ANV5_202508051527'), - bqsrc('dcf91ba6', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20250721_ANV5_202507300429'), - bqsrc('3adff934', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_GSA_MD_20250721_ANV5_202508051535'), - bqsrc('c2dc4f2d', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20250721_ANV5_202507300438'), - bqsrc('a8eb2929', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_GSA_MD_20250721_ANV5_202508051543'), - bqsrc('0da705f6', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20250721_ANV5_202507300445'), - bqsrc('7e5aa87b', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_GSA_MD_20250721_ANV5_202508051551'), - bqsrc('df3c4c25', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20250721_ANV5_202507311853'), - bqsrc('be423fad', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_GSA_MD_20250721_ANV5_202508051559'), - bqsrc('76b648a7', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20250721_ANV5_202507300459'), - bqsrc('2e47cb4a', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_GSA_MD_20250721_ANV5_202508051608'), - bqsrc('b92ea668', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20250721_ANV5_202507300507'), - bqsrc('fb086869', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_GSA_MD_20250721_ANV5_202508051615'), - bqsrc('ef987506', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20250721_ANV5_202507300514'), - bqsrc('f695ec55', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051624'), - bqsrc('cbf89a9e', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20250721_ANV5_202507300522'), - bqsrc('3383c514', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_GSA_MD_20250721_ANV5_202508051633'), - bqsrc('1e2905c4', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20250721_ANV5_202507311902'), - bqsrc('a49a2f24', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_GSA_MD_20250721_ANV5_202508051641'), - bqsrc('cf8affe8', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20250721_ANV5_202507311912'), - bqsrc('2c90922a', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508051651'), - bqsrc('a8ea4518', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20250721_ANV5_202507300545'), - bqsrc('e9212e1f', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_GSA_MD_20250721_ANV5_202508051658'), - bqsrc('14cc9aca', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20250721_ANV5_202507300553'), - bqsrc('2c6d0fc7', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_GSA_MD_20250721_ANV5_202508051707'), - bqsrc('5ec41204', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20250721_ANV5_202507311923'), - bqsrc('42043ca7', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_GSA_MD_20250721_ANV5_202508051715'), - bqsrc('8696ea48', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20250721_ANV5_202507300608'), - bqsrc('c1c7f997', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_GSA_MD_20250721_ANV5_202508051724'), - bqsrc('55db8a23', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20250721_ANV5_202507311932'), - bqsrc('6292d318', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_GSA_MD_20250721_ANV5_202508051734'), - bqsrc('480ae148', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20250721_ANV5_202507311941'), - bqsrc('3ef65a17', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_GSA_MD_20250721_ANV5_202508051741'), - bqsrc('9d825c25', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20250721_ANV5_202507311950'), - bqsrc('c2551282', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051749'), - bqsrc('b623f957', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20250721_ANV5_202507300637'), - bqsrc('d2fe229f', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_GSA_MD_20250721_ANV5_202508051757'), - bqsrc('eb2ef800', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20250721_ANV5_202507300644'), - bqsrc('e78c32ac', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_GSA_MD_20250721_ANV5_202508051804'), - bqsrc('b6e37b1f', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20250721_ANV5_202507300652'), - bqsrc('a5c4baea', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_GSA_MD_20250721_ANV5_202508051813'), - bqsrc('fc9369c5', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20250721_ANV5_202507300659'), - bqsrc('2e229142', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_GSA_MD_20250721_ANV5_202508051820'), - bqsrc('be41731b', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20250721_ANV5_202507300707'), - bqsrc('978ba5d4', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_GSA_MD_20250721_ANV5_202508051912'), - bqsrc('75f1ba4f', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20250721_ANV5_202507300719'), - bqsrc('1102e29c', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_GSA_MD_20250721_ANV5_202508051920'), - bqsrc('906814a8', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20250721_ANV5_202507300726'), - bqsrc('d0448b56', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_GSA_MD_20250721_ANV5_202508051928'), - bqsrc('d76a4f76', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20250721_ANV5_202507311958'), - bqsrc('2950a024', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508051936'), - bqsrc('4cc169a9', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20250721_ANV5_202507300740'), - bqsrc('3a75c858', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_GSA_MD_20250721_ANV5_202508051945'), - bqsrc('6e53560b', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20250721_ANV5_202507312007'), - bqsrc('32238959', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_GSA_MD_20250721_ANV5_202508051955'), - bqsrc('a0516528', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20250721_ANV5_202507300756'), - bqsrc('8a490e9e', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_GSA_MD_20250721_ANV5_202508052004'), - bqsrc('a894d673', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20250721_ANV5_202507300804'), - bqsrc('8091a8f2', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_GSA_MD_20250721_ANV5_202508052013'), - bqsrc('0ba57db1', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20250721_ANV5_202507300810'), - bqsrc('e1a82cc1', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_GSA_MD_20250721_ANV5_202508052020'), - bqsrc('98461d37', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20250721_ANV5_202507300817'), - bqsrc('1490968b', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_GSA_MD_20250721_ANV5_202508052028'), - bqsrc('96cd36de', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20250721_ANV5_202507300824'), - bqsrc('189ec05e', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_GSA_MD_20250721_ANV5_202508052035'), - bqsrc('32793aae', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20250721_ANV5_202507300831'), - bqsrc('f344bff6', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_MDS_GSA_MD_20250721_ANV5_202508052042'), - bqsrc('21a15106', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20250721_ANV5_202507300838'), - bqsrc('1d2b5391', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_GSA_MD_20250721_ANV5_202508052050'), - bqsrc('d3c243dc', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20250721_ANV5_202507300845'), - bqsrc('65dc7d73', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_GSA_MD_20250721_ANV5_202508052101'), - bqsrc('38abec69', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20250721_ANV5_202507300852'), - bqsrc('aa634284', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_GSA_MD_20250730_ANV5_202508061225'), - bqsrc('f715714d', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_GSA_MD_20250721_ANV5_202508052109'), - bqsrc('0fef9408', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20250721_ANV5_202507300900'), - bqsrc('d2819933', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20250721_ANV5_202507300906'), - bqsrc('749bc2ed', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508052116'), - bqsrc('8951be0e', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20250721_ANV5_202507300913'), - bqsrc('2d5171ac', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_GSA_MD_20250721_ANV5_202508052124'), - bqsrc('e358235d', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20250721_ANV5_202507300921'), - bqsrc('9277558d', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508052131'), - bqsrc('e4cb6e34', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20250721_ANV5_202507300929'), - bqsrc('b1bb610a', 'CCDG_Broad_NP_Epilepsy_USAUPN_CHOP_GRU_GSA_MD_20250721_ANV5_202508052139'), - bqsrc('9e68cf90', 'CCDG_Broad_NP_Epilepsy_USAUPN_CHOP_GRU_NPU_GSA_MD_20250721_ANV5_202508052147'), - bqsrc('cb408fa3', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20250721_ANV5_202507300937'), - bqsrc('809be4ae', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20250721_ANV5_202507300944'), - bqsrc('3cce2504', 'CCDG_Broad_NP_Epilepsy_USAUPN_Penn_GRU_GSA_MD_20250721_ANV5_202508052154'), - bqsrc('3ca6cd04', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20250721_ANV5_202507300951'), - bqsrc('b2659034', 'CCDG_Broad_NP_Epilepsy_USAVANControls_HMB_GSO_GSA_MD_20250721_ANV5_202508052211'), - bqsrc('cf9e28f1', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_GSA_MD_20250721_ANV5_202508052202'), - bqsrc('11a5f960', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20250721_ANV5_202507300959'), - # Supersedes snapshot ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes… popped below - bqsrc('ce5b4d0e', 'CCDG_Broad_NP_Epilepsy_USAVANcontrols_HMB_GSO_WES_20250721_ANV5_202507301007'), - bqsrc('84b1d212', 'CCDG_Broad_NP_Epilepsy_ZAFAGN_DS_EPI_COMO_MDS_GSA_MD_20250721_ANV5_202508052220'), - # Supersedes snapshot ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes… popped below - bqsrc('4b531498', 'CCDG_Broad_NP_Epilepsy_ZAFAGN_DS_EPI_COMO_MDS_WES_20250721_ANV5_202507301017'), - bqsrc('bdc5f5a9', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20250721_ANV5_202507301024'), - bqsrc('2b98851b', 'CMG_Yale_GRU_20221020_ANV5_202507091800'), - bqsrc('83ab11a0', 'CMH_GAFK_R5_20250801_ANV5_202508011248'), - bqsrc('e5c7dfdd', 'ENCORE_293T_20250710_ANV5_202507211509'), - bqsrc('43e71067', 'ENCORE_RS293_20250710_ANV5_202507211507'), - bqsrc('262cd5df', 'GREGOR_R03_GRU_20250612_ANV5_202506271443'), - bqsrc('a50f51b8', 'GREGOR_R03_HMB_20250612_ANV5_202506271503'), - bqsrc('fe5fb412', 'GTEx_v10_hg38_20241105_ANV5_202506201300'), - bqsrc('e6508a35', 'HudsonAlpha_LR_v1_GRU_20241018_ANV5_202507091815'), - bqsrc('4ed55ce5', 'IGVF_HMB_MDS_R1_20250801_ANV5_202508011316'), - bqsrc('34477ca5', 'MAGE_20250710_ANV5_202507211510'), - bqsrc('027605b3', 'NIA_CARD_LR_WGS_HBCC_20250731_ANV5_202508011421'), - bqsrc('31f7c3dd', 'NIA_CARD_LR_WGS_NABEC_GRU_V2_20250731_ANV5_202508011423'), - bqsrc('04b6f4d8', 'PAGE_BioMe_GRU_WGS_20250224_ANV5_202502241731'), - bqsrc('af4c978f', 'PAGE_MEC_GRU_WGS_20250224_ANV5_202502241739'), - bqsrc('71b74bcf', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20250224_ANV5_202502241745'), - bqsrc('ff012258', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202409302105', pop), - bqsrc('61b6b42b', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202409302116', pop), -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `anvilprod` (short for AnVIL production) - # deployment here. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - 'AZUL_DEPLOYMENT_STAGE': 'anvilprod', - - 'AZUL_DOMAIN_NAME': 'explore.anvilproject.org', - - 'AZUL_CATALOGS': base64.b64encode(bz2.compress(json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='anvil'), - repository=dict(name='tdr_anvil')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('anvil', 'anvil9', anvil9_sources), - ('anvil', 'anvil11', anvil11_sources), - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }).encode())).decode('ascii'), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://data.terra.bio', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org', - 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org', - - 'AZUL_ENABLE_MONITORING': '1', - - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.xlarge.search', - 'AZUL_ES_INSTANCE_COUNT': '6', - - 'AZUL_CONTRIBUTION_CONCURRENCY': '300/64', - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'anvil', - - 'AZUL_OWNER': 'hannes@ucsc.edu', - - 'AZUL_MONITORING_EMAIL': 'azul-group@ucsc.edu', - - 'AZUL_AWS_ACCOUNT_ID': '465330168186', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-anvil-prod', - - 'AZUL_DEPLOYMENT_INCARNATION': '1', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '1055427471534-8ee4mhig5j40n6n366j7uul26bbbhp2p.apps.googleusercontent.com', - - 'azul_slack_integration': json.dumps({ - 'workspace_id': 'T09P9H91S', # ucsc-gi.slack.com - 'channel_id': 'C04TKUL49FA' # #team-boardwalk-anvilprod - }), - - 'AZUL_ENABLE_VERBATIM_RELATIONS': '0', - } diff --git a/deployments/dev.browser/environment.py b/deployments/dev.browser/environment.py deleted file mode 100644 index 0b28adf523..0000000000 --- a/deployments/dev.browser/environment.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'browser', - 'azul_browser_sites': json.dumps({ - 'browser': { - 'zone': '{AZUL_DOMAIN_NAME}', - 'domain': 'explore.{AZUL_DOMAIN_NAME}', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/hca/dev', - 'tarball_name': 'hca', - 'tarball_path': 'out', - 'real_path': '' - }, - 'lungmap': { - 'zone': 'dev.data-browser.lungmap.net', - 'domain': 'dev.data-browser.lungmap.net', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/lungmap/dev', - 'tarball_name': 'lungmap', - 'tarball_path': 'out', - 'real_path': '' - } - }) - } diff --git a/deployments/dev.gitlab/environment.py b/deployments/dev.gitlab/environment.py deleted file mode 100644 index 68f0d4c3f1..0000000000 --- a/deployments/dev.gitlab/environment.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'gitlab', - 'azul_vpc_cidr': '172.21.0.0/16', - 'azul_vpn_subnet': '10.42.0.0/16' - } diff --git a/deployments/dev.shared/environment.py b/deployments/dev.shared/environment.py deleted file mode 100644 index 5705e22099..0000000000 --- a/deployments/dev.shared/environment.py +++ /dev/null @@ -1,38 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'shared', - 'azul_aws_support_roles': json.dumps(['administrator', 'developer']), - - 'azul_security_contact': json.dumps({ - 'name': 'Hannes Schmidt', - 'title': 'Tech lead', - 'email_address': 'azul-group@ucsc.edu', - 'phone_number': '831-454-8200' - }), - } diff --git a/deployments/dev/.example.environment.local.py b/deployments/dev/.example.environment.local.py deleted file mode 100644 index 40bda46631..0000000000 --- a/deployments/dev/.example.environment.local.py +++ /dev/null @@ -1,34 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - - # In the AWS IAM console, create an access key and list it in a - # dedicated configuration profile section of `~/.aws/config` and/or - # `~/.aws/credentials`. Specify the name of the profile here. - # - 'AWS_PROFILE': 'yourprofile', - } diff --git a/deployments/hammerbox/.example.environment.local.py b/deployments/hammerbox/.example.environment.local.py deleted file mode 120000 index 31145230db..0000000000 --- a/deployments/hammerbox/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../sandbox/.example.environment.local.py \ No newline at end of file diff --git a/deployments/hammerbox/environment.py b/deployments/hammerbox/environment.py deleted file mode 100644 index 6696460d64..0000000000 --- a/deployments/hammerbox/environment.py +++ /dev/null @@ -1,1302 +0,0 @@ -import base64 -import bz2 -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -is_sandbox = True - -pop = 1 # remove snapshot - - -def bqsrc(google_project: str, - snapshot: str, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - assert len(google_project) == 8, google_project - project = 'datarepo-' + google_project - # Some snapshots start with AnVIL instead of ANVIL - if not snapshot.upper().startswith('ANVIL_'): - snapshot = 'ANVIL_' + snapshot - return mksrc('bigquery', project, snapshot, flags, prefix) - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - project = '_'.join(snapshot.split('_')[1:-3]) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -anvil_sources = mkdict({}, 11, mkdelta([ - bqsrc('3edb7fb1', '1000G_high_coverage_2019_20230517_ANV5_202305181946'), - bqsrc('db7353fb', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20230419_ANV5_202304201858'), - bqsrc('3b8ef67a', 'CMG_UWASH_DS_BDIS_20230418_ANV5_202304201958'), - bqsrc('5d27ebfe', 'CMG_UWASH_DS_HFA_20230418_ANV5_202304201932'), - bqsrc('9d1a6e0a', 'CMG_UWASH_DS_NBIA_20230418_ANV5_202304201949'), - bqsrc('3243df15', 'CMG_UWASH_HMB_20230418_ANV5_202304201923'), - bqsrc('50484f86', 'CMG_UWASH_HMB_IRB_20230418_ANV5_202304201915'), - bqsrc('74bd0964', 'CMG_UWash_DS_EP_20230419_ANV5_202304201906'), - bqsrc('e5914f89', 'CMG_UWash_GRU_20230418_ANV5_202304201848'), - bqsrc('97ec5366', 'CMG_UWash_GRU_IRB_20230418_ANV5_202304201940'), - bqsrc('4150bd87', 'GTEx_V8_hg38_20230419_ANV5_202304202007') -])) - -anvil1_sources = mkdict(anvil_sources, 63, mkdelta([ - bqsrc('d53aa186', 'CMG_BROAD_BRAIN_ENGLE_WES_20221102_ANV5_202304241525'), - bqsrc('69b2535a', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202304241530'), - bqsrc('490be510', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202304241538'), - bqsrc('3b33c41b', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202304241459'), - bqsrc('96df3cea', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202304241501'), - bqsrc('179ee079', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202304241503'), - bqsrc('3dd4d039', 'CMG_Broad_Brain_Gleeson_WES_20221117_ANV5_202304241517'), - bqsrc('c361373f', 'CMG_Broad_Brain_Muntoni_WES_20221102_ANV5_202304241527'), - bqsrc('12ac342c', 'CMG_Broad_Brain_NeuroDev_WES_20221102_ANV5_202304241529'), - bqsrc('d7bfafc6', 'CMG_Broad_Brain_Thaker_WES_20221102_ANV5_202304241531'), - bqsrc('29812b42', 'CMG_Broad_Eye_Pierce_WES_20221205_ANV5_202304242250'), - bqsrc('48134558', 'CMG_Broad_Eye_Pierce_WGS_20221117_ANV5_202304241507'), - bqsrc('36ebaa12', 'CMG_Broad_Heart_PCGC_Tristani_WGS_20221025_ANV5_202304211840'), - bqsrc('f9826139', 'CMG_Broad_Heart_Seidman_WES_20221117_ANV5_202304241504'), - bqsrc('85952af8', 'CMG_Broad_Kidney_Hildebrandt_WES_20230525_ANV5_202305251733'), - bqsrc('ee4ae9a1', 'CMG_Broad_Kidney_Hildebrandt_WGS_20221025_ANV5_202304211844'), - bqsrc('cf168274', 'CMG_Broad_Kidney_Pollak_WES_20221025_ANV5_202304211846'), - bqsrc('4d47ba2c', 'CMG_Broad_Muscle_Beggs_WGS_20221102_ANV5_202304241533'), - bqsrc('82d1271a', 'CMG_Broad_Muscle_Bonnemann_WES_20221117_ANV5_202304241509'), - bqsrc('6be3fb25', 'CMG_Broad_Muscle_Bonnemann_WGS_20221117_ANV5_202304241510'), - bqsrc('b168eb10', 'CMG_Broad_Muscle_KNC_WES_20221116_ANV5_202304242219'), - bqsrc('372244aa', 'CMG_Broad_Muscle_KNC_WGS_20221117_ANV5_202304242221'), - bqsrc('77a6c0aa', 'CMG_Broad_Muscle_Kang_WGS_20221025_ANV5_202304211849'), - bqsrc('736a5f1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202304271308'), - bqsrc('5019143b', 'CMG_Broad_Muscle_Myoseq_WES_20230621_ANV5_202306211852'), - bqsrc('27eb651a', 'CMG_Broad_Muscle_Myoseq_WGS_20221208_ANV5_202304271310'), - bqsrc('c087af7a', 'CMG_Broad_Muscle_OGrady_WES_20221205_ANV5_202304242252'), - bqsrc('db987a2e', 'CMG_Broad_Muscle_Ravenscroft_WES_20221208_ANV5_202304271311'), - bqsrc('05df566c', 'CMG_Broad_Muscle_Topf_WES_20221208_ANV5_202304271313'), - bqsrc('87d91f06', 'CMG_Broad_Orphan_Chung_WES_20221102_ANV5_202304241534'), - bqsrc('25f6b696', 'CMG_Broad_Orphan_Estonia_Ounap_WES_20221117_ANV5_202304241512'), - bqsrc('c3b16b41', 'CMG_Broad_Orphan_Estonia_Ounap_WGS_20221205_ANV5_202304242255'), - bqsrc('e2976b05', 'CMG_Broad_Orphan_Jueppner_WES_20221102_ANV5_202304241535'), - bqsrc('32fe2260', 'CMG_Broad_Orphan_Lerner_Ellis_WES_20221102_ANV5_202304241536'), - bqsrc('6f9e574e', 'CMG_Broad_Orphan_Manton_WES_20221117_ANV5_202304241513'), - bqsrc('53cd689b', 'CMG_Broad_Orphan_Manton_WGS_20221117_ANV5_202304241515'), - bqsrc('e7c5babf', 'CMG_Broad_Orphan_Scott_WES_20221025_ANV5_202304241458'), - bqsrc('051877f4', 'CMG_Broad_Orphan_Sweetser_WES_20221102_ANV5_202304241539'), - bqsrc('555c7706', 'CMG_Broad_Orphan_VCGS_White_WES_20221018_ANV5_202304241522'), - bqsrc('3a8f7952', 'CMG_Broad_Orphan_VCGS_White_WGS_20221117_ANV5_202304241523'), - bqsrc('b699c5e3', 'CMG_Broad_Rare_RGP_WES_20221102_ANV5_202304241540'), - bqsrc('2d5bd095', 'CMG_Broad_Stillbirth_Wilkins_Haug_WES_20221102_ANV5_202304241542'), - bqsrc('f3d0eda6', 'CMG_UWash_GRU_20230418_ANV5_202306211828'), - bqsrc('ab5c3fa5', 'CMG_YALE_DS_RARED_20221020_ANV5_202304211812'), - bqsrc('d51578f4', 'CMG_Yale_GRU_20221020_ANV5_202304211517'), - bqsrc('bcedc554', 'CMG_Yale_HMB_20221020_ANV5_202304211813'), - bqsrc('f485fa3e', 'CMG_Yale_HMB_GSO_20221020_ANV5_202304211519'), - bqsrc('45487b69', 'GTEx_Somatic_WGS_20230331_ANV5_202304211636'), - bqsrc('5ebc368c', 'GTEx_V7_hg19_20221128_ANV5_202304211804'), - bqsrc('864913f2', 'GTEx_V9_hg38_20221128_ANV5_202304211853'), - bqsrc('b093b69d', 'GTEx_public_data_20221115_ANV5_202304211659'), - bqsrc('d948d21a', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202304271345'), - bqsrc('1cb73890', 'cmg_broad_heart_ware_wes_20221215_ANV5_202304242145'), -])) - -anvil2_sources = mkdict(anvil1_sources, 104, mkdelta([ - bqsrc('36124817', 'African_American_Seq_HGV_20230727_ANV5_202308291753'), - bqsrc('d795027d', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20221020_ANV5_202304211823'), - bqsrc('642829f3', 'CCDG_Broad_CVD_AF_VAFAR_WES_20221024_ANV5_202304211826'), - bqsrc('08216a2c', 'CCDG_Broad_CVD_AFib_Vanderbilt_Ablation_WGS_20221020_ANV5_202304211819'), - bqsrc('74975e89', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202304271548'), - bqsrc('ad61c47e', 'CCDG_NHGRI_Broad_ASD_Daly_phs000298_WES_vcf_20230403_ANV5_202304271610'), - bqsrc('5e719362', 'CCDG_NYGC_AI_Asthma_Gala2_WGS_20230605_ANV5_202306131248'), - bqsrc('2734a0e4', 'CCDG_NYGC_NP_Alz_EFIGA_WGS_20230605_ANV5_202306141705'), - bqsrc('710fc60d', 'CCDG_NYGC_NP_Alz_LOAD_WGS_20230605_ANV5_202306131256'), - bqsrc('9626b3eb', 'CCDG_NYGC_NP_Alz_WHICAP_WGS_20230605_ANV5_202306131303'), - bqsrc('25ec7b57', 'CCDG_WASHU_PAGE_20221220_ANV5_202304271544'), - bqsrc('6d8536f4', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202304211527'), - bqsrc('482ab960', 'CMH_GAFK_GS_long_read_20221109_ANV5_202304211529'), - bqsrc('8745e97d', 'CMH_GAFK_scRNA_20221107_ANV5_202304211533'), - bqsrc('1c89dcac', 'CSER_CHARM_GRU_20221208_ANV5_202304271348'), - bqsrc('12d56848', 'CSER_NCGENES2_GRU_20221208_ANV5_202304271349'), - bqsrc('8a4d67ef', 'CSER_SouthSeq_GRU_20221208_ANV5_202304271351'), - bqsrc('f622180d', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_VillageData_20230109_ANV5_202304242045'), - bqsrc('732d1a55', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20230109_ANV5_202304242048'), - bqsrc('90bab913', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202304242043'), - bqsrc('e4eb7641', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_WGS_20221115_ANV5_202304242052'), - bqsrc('f9aef3dc', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20221103_ANV5_202304242049'), - bqsrc('aca6a582', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202304242046'), - bqsrc('06abb598', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202304211817'), - bqsrc('7c4410ed', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202304211721'), - bqsrc('84d2e3b1', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202304211827'), - bqsrc('ffbc38fd', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202304211722'), - bqsrc('b1f3e0d1', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202304211749'), - bqsrc('11330a21', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202304211759'), - bqsrc('86a1dbf3', 'ccdg_broad_ai_ibd_daly_bernstein_gsa_20221025_ANV5_202304241921'), - bqsrc('833ff0a3', 'eMERGE_GRU_IRB_NPU_eMERGEseq_20230130_ANV5_202304271614'), - bqsrc('baf040af', 'eMERGE_GRU_IRB_PUB_NPU_eMERGEseq_20230130_ANV5_202304271616'), - bqsrc('270b3b62', 'eMERGE_GRU_IRB_eMERGEseq_20230130_ANV5_202304271613'), - bqsrc('c13efbe9', 'eMERGE_GRU_NPU_eMERGEseq_20230130_ANV5_202304271617'), - bqsrc('34f8138d', 'eMERGE_GRU_eMERGEseq_20230130_ANV5_202304271612'), - bqsrc('90b7b6e8', 'eMERGE_HMB_GSO_eMERGEseq_20230130_ANV5_202304271621'), - bqsrc('6e6dca92', 'eMERGE_HMB_IRB_PUB_eMERGEseq_20230130_ANV5_202304271622'), - bqsrc('1ddf2a8e', 'eMERGE_HMB_NPU_eMERGEseq_20230130_ANV5_202304271624'), - bqsrc('dba97a65', 'eMERGE_HMB_eMERGEseq_20230130_ANV5_202304271619'), - bqsrc('51aa9a22', 'eMERGE_PGRNseq_20230118_ANV5_202304241853'), - bqsrc('ce8c469f', 'eMERGE_PRS_Arrays_20221220_ANV5_202304271346') -])) - -anvil3_sources = mkdict(anvil2_sources, 151, mkdelta([ - bqsrc('9a74aed3', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202310091900'), - bqsrc('0768a322', 'CCDG_Broad_CVD_AF_Ellinor_MGH_Arrays_20221024_ANV5_202304211831'), - bqsrc('2b135baf', 'CCDG_Broad_CVD_AFib_MGH_WGS_20221024_ANV5_202304211829'), - bqsrc('96b594f9', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202310101655'), - bqsrc('318ae48e', 'CCDG_Broad_CVD_Stroke_BRAVE_WGS_20221107_ANV5_202304241543'), - bqsrc('7ea7a6e9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20221107_ANV5_202304241545'), - bqsrc('2339e241', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202304241613'), - bqsrc('cd6cee03', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202304241612'), - bqsrc('da88c3ce', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202304241657'), - bqsrc('2b361bda', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202304241549'), - bqsrc('6eeff3fc', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202304241701'), - bqsrc('21923ed0', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202304241556'), - bqsrc('5b10132b', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202304241552'), - bqsrc('d2d5ba15', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202304241702'), - bqsrc('fc0a35a8', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202304241704'), - bqsrc('f14cd6d7', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202304241659'), - bqsrc('3832cf81', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202304241548'), - bqsrc('098aadb0', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202304241551'), - bqsrc('d9ea4f23', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202304241554'), - bqsrc('0c9ab563', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202304241705'), - bqsrc('a383d752', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202304241707'), - bqsrc('03b52641', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202304241601'), - bqsrc('2e9ab296', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202304241605'), - bqsrc('89162c54', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202304241609'), - bqsrc('fd5cd738', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202304241602'), - bqsrc('d987821a', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202304241709'), - bqsrc('b93e1cfa', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202304241604'), - bqsrc('2e9630dd', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202304241559'), - bqsrc('ee58a7a9', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202306131343'), - bqsrc('ff5356bb', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202304241555'), - bqsrc('2262daa7', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202304241610'), - bqsrc('2a947c33', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202304241558'), - bqsrc('5b3c42e1', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202304241713'), - bqsrc('91b4b33c', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202306211900'), - bqsrc('e4fe111a', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202304241607'), - bqsrc('8b120833', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202304241716'), - bqsrc('f051499d', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202304241720'), - bqsrc('fd49a493', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202306211841'), - bqsrc('076da44b', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202306211847'), - bqsrc('7e03b5fd', 'CMG_Broad_Brain_Walsh_WES_20230605_ANV5_202310101734'), - bqsrc('c43e7400', 'CMG_Broad_Muscle_Kang_WES_20230525_ANV5_202310101649'), - bqsrc('14f5afa3', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_10XLRGenomes_20221115_ANV5_202310101713'), - bqsrc('94091a22', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_10XLRGenomes_20230331_ANV5_202310101715'), - bqsrc('55b75002', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202310061302'), - bqsrc('02ad84ea', 'T2T_20230714_ANV5_202310101616'), - bqsrc('08cd15a2', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202304211552'), - bqsrc('e3065356', 'ccdg_washu_cvd_eocad_biome_wgs_20221024_ANV5_202304211601'), -])) - -anvil4_sources = mkdict(anvil3_sources, 200, mkdelta([ - bqsrc('1a86e7ca', 'CCDG_Baylor_CVD_AFib_Groningen_WGS_20221122_ANV5_202304242224'), - bqsrc('92716a90', 'CCDG_Baylor_CVD_AFib_VAFAR_HMB_IRB_WGS_20221020_ANV5_202304211525'), - bqsrc('77445496', 'CCDG_Baylor_CVD_EOCAD_BioMe_WGS_20221122_ANV5_202304242226'), - bqsrc('1b0d6b90', 'CCDG_Baylor_CVD_HHRC_Brownsville_GRU_WGS_20221122_ANV5_202304242228'), - bqsrc('373b7918', 'CCDG_Baylor_CVD_HemStroke_BNI_HMB_WGS_20221215_ANV5_202304242306'), - bqsrc('efc3e806', 'CCDG_Baylor_CVD_HemStroke_Duke_DS_WGS_20221117_ANV5_202304242122'), - bqsrc('1044f96d', 'CCDG_Baylor_CVD_HemStroke_ERICH_WGS_20221207_ANV5_202304271256'), - bqsrc('f23a6ec8', 'CCDG_Baylor_CVD_HemStroke_GERFHS_HMB_WGS_20221215_ANV5_202304242307'), - bqsrc('de34ca6e', 'CCDG_Baylor_CVD_HemStroke_Regards_DS_WGS_20221117_ANV5_202304242123'), - bqsrc('d9c6f406', 'CCDG_Baylor_CVD_HemStroke_Yale_HMB_WGS_20221215_ANV5_202304242309'), - bqsrc('56883e56', 'CCDG_Baylor_CVD_Oregon_SUDS_GRU_WGS_20221215_ANV5_202304242302'), - bqsrc('7f3ba7ec', 'CCDG_Baylor_CVD_TexGen_DS_WGS_20221117_ANV5_202304242125'), - bqsrc('da965e26', 'CCDG_Baylor_CVD_Ventura_Presto_GRU_IRB_WGS_20221117_ANV5_202304242127'), - bqsrc('906bf803', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20221110_ANV5_202304241911'), - bqsrc('343ca1c3', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20221110_ANV5_202304241912'), - bqsrc('80a63603', 'CCDG_Broad_AI_IBD_Cho_WGS_20230313_ANV5_202304241903'), - bqsrc('a98e7a43', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20221110_ANV5_202304241906'), - bqsrc('381bc957', 'CCDG_Broad_AI_IBD_McCauley_WGS_20221110_ANV5_202304241914'), - bqsrc('6a10165d', 'CCDG_Broad_AI_IBD_McGovern_WGS_20221110_ANV5_202304241907'), - bqsrc('a2743c82', 'CCDG_Broad_AI_IBD_Newberry_WGS_20221025_ANV5_202304241901'), - bqsrc('ed109b2f', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_Arrays_20230612_ANV5_202306131350'), - bqsrc('3d8b62d7', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_WES_20221025_ANV5_202304241856'), - bqsrc('450ba911', 'CCDG_Broad_CVD_AF_ENGAGE_DS_WES_20230418_ANV5_202304210808'), - bqsrc('dfabf632', 'CCDG_Broad_CVD_AF_Ellinor_MGH_WES_20221117_ANV5_202304271354'), - bqsrc('485eb707', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20230128_ANV5_202304271554'), - bqsrc('58dffe5a', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20221103_ANV5_202304242105'), - bqsrc('cf7f2c0c', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20221103_ANV5_202304242107'), - bqsrc('f896734e', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20221102_ANV5_202304242039'), - bqsrc('40c2f4f4', 'CCDG_Broad_CVD_AF_Marcus_UCSF_WES_20221222_ANV5_202304242040'), - bqsrc('67117555', 'CCDG_Broad_CVD_AF_Rienstra_WES_20221222_ANV5_202304242035'), - bqsrc('c45dd622', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_Arrays_20221103_ANV5_202304242110'), - bqsrc('b12d2e52', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_WES_20230118_ANV5_202304242112'), - bqsrc('43f6230a', 'CCDG_Broad_CVD_AFib_AFLMU_WGS_20231008_ANV5_202310091911'), - bqsrc('de64d25a', 'CCDG_Broad_CVD_AFib_UCSF_WGS_20221222_ANV5_202304242037'), - bqsrc('e25350dd', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202310101704'), - bqsrc('9921a6fa', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202306211933'), - bqsrc('383d9d9b', 'CCDG_Broad_CVD_PROMIS_GRU_WES_20230418_ANV5_202306211912'), - bqsrc('5df71da4', 'CCDG_Broad_MI_InStem_WES_20221122_ANV5_202304242236'), - bqsrc('1793828c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSRS_WES_20230324_ANV5_202304241752'), - bqsrc('d44547dc', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202304271556'), - bqsrc('70c803d7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20221117_ANV5_202304271400'), - bqsrc('f5a4a895', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20221117_ANV5_202304271358'), - bqsrc('b8b8ba44', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20221117_ANV5_202304271356'), - bqsrc('0b0ca621', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202306211945'), - bqsrc('f85048a3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20221117_ANV5_202304271401'), - bqsrc('68037179', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202304271559'), - bqsrc('025215fc', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202304271601'), - bqsrc('92905a2b', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20221117_ANV5_202304271403'), - bqsrc('3f3ad5c7', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20230118_ANV5_202304271404') -])) - -anvil5_sources = mkdict(anvil4_sources, 261, mkdelta([ - bqsrc('3c30a9a2', '1000G_high_coverage_2019_20230517_ANV5_202403030329'), - bqsrc('adf70694', 'ALS_FTD_ALS_AssociatedGenes_GRU_v1_20231221_ANV5_202401112025'), - bqsrc('815ad21b', 'ALS_FTD_DEMENTIA_SEQ_GRU_v1_20231221_ANV5_202401112033'), - bqsrc('ab46a8e4', 'CCDG_NYGC_NP_Autism_ACE2_DS_MDS_WGS_20230605_ANV5_202403032021'), - bqsrc('df058a48', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202403032044'), - bqsrc('61910b61', 'CCDG_NYGC_NP_Autism_CAG_DS_WGS_20230605_ANV5_202403032053'), - bqsrc('8d6472a1', 'CCDG_NYGC_NP_Autism_HFA_DS_WGS_20230605_ANV5_202403032108'), - bqsrc('f0a12498', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_DS_WGS_20221103_ANV5_202403032124'), - bqsrc('f06dc5dd', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_GRU_WGS_20221103_ANV5_202403032131'), - bqsrc('b791f5c1', 'CCDG_NYGC_NP_Autism_SAGE_WGS_20230605_ANV5_202403032137'), - bqsrc('b9222139', 'CMG_BROAD_BRAIN_ENGLE_WES_20240205_ANV5_202402051624'), - bqsrc('7e094253', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202402281543'), - bqsrc('c797490f', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202402281552'), - bqsrc('0a1360b1', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202402290547'), - bqsrc('faa71b49', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202402290555'), - bqsrc('abce6387', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202402290606'), - bqsrc('4153ad1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202402291926'), - bqsrc('5bbb5a28', 'CMG_Broad_Orphan_Jueppner_WES_20240205_ANV5_202402051640'), - bqsrc('18bd3df4', 'CMG_UWASH_HMB_20230418_ANV5_202402070029'), - bqsrc('6f4155f2', 'CMG_UWash_GRU_20240301_ANV5_202403040330'), - bqsrc('6486ae96', 'CMG_UWash_GRU_1_20240113_ANV5_202401141440'), - bqsrc('0fad0f77', 'CMG_YALE_DS_RARED_20221020_ANV5_202402281620'), - bqsrc('ad307392', 'CMG_Yale_GRU_20221020_ANV5_202402281628'), - bqsrc('fecab5bc', 'CMG_Yale_HMB_20221020_ANV5_202402290926'), - bqsrc('f9699204', 'CMG_Yale_HMB_GSO_20221020_ANV5_202402290935'), - bqsrc('c5bd892a', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202402290945'), - bqsrc('5e64223a', 'CMH_GAFK_GS_long_read_20240301_ANV5_202403040349'), - bqsrc('ba97c05c', 'CMH_GAFK_scRNA_20221107_ANV5_202402291004'), - bqsrc('2659c380', 'CSER_CHARM_GRU_20240301_ANV5_202403040357'), - bqsrc('0f2e95ad', 'CSER_KidsCanSeq_GRU_20221208_ANV5_202402292138'), - bqsrc('62a0bd6d', 'CSER_NCGENES2_GRU_20221208_ANV5_202402292147'), - bqsrc('df02801a', 'CSER_NYCKIDSEQ_GRU_20240113_ANV5_202401141520'), - bqsrc('4b9c138d', 'CSER_NYCKIDSEQ_HMB_20240113_ANV5_202401141527'), - bqsrc('f4d60c69', 'CSER_P3EGS_GRU_20230727_ANV5_202402070059'), - bqsrc('fc5ed559', 'CSER_SouthSeq_GRU_20221208_ANV5_202402292154'), - bqsrc('74121c99', 'GTEx_BCM_GRU_CoRSIVs_20240116_ANV5_202401170141'), - bqsrc('1a706b0c', 'GTEx_Somatic_WGS_20240116_ANV5_202401170147'), - bqsrc('e063cf6d', 'GTEx_V7_hg19_20221128_ANV5_202402291034'), - bqsrc('383c097a', 'GTEx_V8_hg38_20240116_ANV5_202401170154'), - bqsrc('701eea84', 'GTEx_V9_hg38_20221128_ANV5_202402070108'), - bqsrc('ff9d78a5', 'GTEx_public_data_20240117_ANV5_202401180400'), - bqsrc('37c3d458', 'NIA_CARD_Coriell_Cell_Lines_Open_20230727_ANV5_202401111624'), - bqsrc('06c78117', 'NIA_CARD_LR_WGS_NABEC_GRU_20230727_ANV5_202401111634'), - bqsrc('e4eb7641', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_WGS_20221115_ANV5_202304242052', pop), - bqsrc('a3880121', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_WGS_20240112_ANV5_202402062129'), - bqsrc('25790186', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202403040429'), - bqsrc('b371989b', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202403040437'), - bqsrc('4a4eec27', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202403040445'), - bqsrc('a1f917db', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202403040453'), - bqsrc('6264931f', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202403040500'), - bqsrc('8d62ec8f', 'T2T_20230714_ANV5_202312122150'), - bqsrc('bfabc906', 'ccdg_asc_ndd_daly_talkowski_ac_boston_asd_exome_20221117_ANV5_202403040552'), - bqsrc('825399a4', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20221108_ANV5_202403040608'), - bqsrc('e3b070a7', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20230327_ANV5_202403040615'), - bqsrc('2354d65a', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202402291144'), - bqsrc('0ad3f21a', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20221107_ANV5_202403040623'), - bqsrc('c148a340', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20221201_ANV5_202403040630'), - bqsrc('bc613fa9', 'ccdg_asc_ndd_daly_talkowski_domenici_asd_exome_20221117_ANV5_202403040637'), - bqsrc('97e22445', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20221107_ANV5_202403040652'), - bqsrc('72efc816', 'ccdg_asc_ndd_daly_talkowski_herman_asd_exome_20221117_ANV5_202403040701'), - bqsrc('e25caee8', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20221107_ANV5_202403040708'), - bqsrc('22af2470', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20221107_ANV5_202403040716'), - bqsrc('a81009d9', 'ccdg_asc_ndd_daly_talkowski_hultman_asd_exome_20231013_ANV5_202403040723'), - bqsrc('bc078d98', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20221108_ANV5_202403040731'), - bqsrc('0949186c', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20221109_ANV5_202403040739'), - bqsrc('4dc4f939', 'ccdg_asc_ndd_daly_talkowski_lattig_asd_exome_20221122_ANV5_202403040746'), - bqsrc('5ed988f8', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20221108_ANV5_202403040800'), - bqsrc('c6a938e4', 'ccdg_asc_ndd_daly_talkowski_minshew_asd_exome_20221117_ANV5_202403040807'), - bqsrc('a245d786', 'ccdg_asc_ndd_daly_talkowski_palotie_asd_exome_20221019_ANV5_202403040815'), - bqsrc('7ddd7425', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20221108_ANV5_202403040822'), - bqsrc('aa9f0b28', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_wgs_20221027_ANV5_202403040846'), - bqsrc('0b4c3cfb', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202402291202'), - bqsrc('8023858b', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20221108_ANV5_202403040925'), - bqsrc('381b5d80', 'ccdg_broad_ai_ibd_alm_gmc_wes_20230328_ANV5_202403040932'), - bqsrc('714d60b9', 'ccdg_broad_ai_ibd_daly_alm_gmc_gsa_20221025_ANV5_202402291210'), - bqsrc('86a1dbf3', 'ccdg_broad_ai_ibd_daly_bernstein_gsa_20221025_ANV5_202304241921', pop), - bqsrc('dc7a9acd', 'ccdg_broad_ai_ibd_daly_brant_niddk_gsa_20240103_ANV5_202401112147'), - bqsrc('916fc0b6', 'ccdg_broad_ai_ibd_daly_duerr_niddk_gsa_20240113_ANV5_202402062134'), - bqsrc('48d85607', 'ccdg_broad_ai_ibd_daly_hyams_protect_wes_20240104_ANV5_202403041011'), - bqsrc('21d3c731', 'ccdg_broad_ai_ibd_daly_kupcinskas_wes_20240104_ANV5_202403041018'), - bqsrc('614a8519', 'ccdg_broad_ai_ibd_daly_lewis_ccfa_wes_20240113_ANV5_202403041026'), - bqsrc('6799d240', 'ccdg_broad_ai_ibd_daly_lewis_sparc_gsa_20240104_ANV5_202401121517'), - bqsrc('d7ae08a2', 'ccdg_broad_ai_ibd_daly_louis_wes_20240104_ANV5_202403041042'), - bqsrc('9b04a16e', 'ccdg_broad_ai_ibd_daly_mccauley_gsa_20240113_ANV5_202402062137'), - bqsrc('b6a95447', 'ccdg_broad_ai_ibd_daly_mccauley_wes_20240104_ANV5_202403041049'), - bqsrc('df7a6188', 'ccdg_broad_ai_ibd_daly_mcgovern_gsa_20240118_ANV5_202402062140'), - bqsrc('5cd83e88', 'ccdg_broad_ai_ibd_daly_mcgovern_niddk_wes_20240104_ANV5_202403041057'), - bqsrc('fa7e066f', 'ccdg_broad_ai_ibd_daly_mcgovern_share_wes_20240104_ANV5_202401121556', pop), - bqsrc('2def0ed8', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_gsa_20240105_ANV5_202401121603'), - bqsrc('6e9fe586', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_wes_20240105_ANV5_202403041109'), - bqsrc('1f3dab2b', 'ccdg_broad_ai_ibd_daly_pekow_share_gsa_20240105_ANV5_202401121646'), - bqsrc('74869ac4', 'ccdg_broad_ai_ibd_daly_pekow_share_wes_20240105_ANV5_202403041133'), - bqsrc('d95b9a73', 'ccdg_broad_ai_ibd_niddk_daly_brant_wes_20240112_ANV5_202403041232'), - bqsrc('7a0883a4', 'ccdg_broad_cvd_af_pegasus_hmb_20221025_ANV5_202403030736'), - bqsrc('f62c5ebd', 'ccdg_broad_cvd_eocad_promis_wgs_20221213_ANV5_202403030935'), - bqsrc('9d116a5c', 'ccdg_broad_mi_atvb_ds_cvd_wes_20221025_ANV5_202403031035'), - bqsrc('bb315b29', 'ccdg_nygc_np_autism_tasc_wgs_20221024_ANV5_202403032216'), - bqsrc('33e3428b', 'ccdg_washu_cvd_np_ai_controls_vccontrols_wgs_20221024_ANV5_202403032319'), - bqsrc('17c5f983', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202402290614'), - bqsrc('a46c0244', 'nhgri_broad_ibd_daly_kugathasan_wes_20240112_ANV5_202403041258'), - bqsrc('4b4f2325', 'nhgri_broad_ibd_daly_turner_wes_20240112_ANV5_202403041307'), -])) - -anvil6_sources = mkdict(anvil5_sources, 249, mkdelta([ - bqsrc('38af6304', '1000G_PRIMED_data_model_20240410_ANV5_202404101419'), - bqsrc('1a86e7ca', 'CCDG_Baylor_CVD_AFib_Groningen_WGS_20221122_ANV5_202304242224', pop), - bqsrc('92716a90', 'CCDG_Baylor_CVD_AFib_VAFAR_HMB_IRB_WGS_20221020_ANV5_202304211525', pop), - bqsrc('e8fc4258', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202403030358'), - bqsrc('77445496', 'CCDG_Baylor_CVD_EOCAD_BioMe_WGS_20221122_ANV5_202304242226', pop), - bqsrc('1b0d6b90', 'CCDG_Baylor_CVD_HHRC_Brownsville_GRU_WGS_20221122_ANV5_202304242228', pop), - bqsrc('373b7918', 'CCDG_Baylor_CVD_HemStroke_BNI_HMB_WGS_20221215_ANV5_202304242306', pop), - bqsrc('efc3e806', 'CCDG_Baylor_CVD_HemStroke_Duke_DS_WGS_20221117_ANV5_202304242122', pop), - bqsrc('1044f96d', 'CCDG_Baylor_CVD_HemStroke_ERICH_WGS_20221207_ANV5_202304271256', pop), - bqsrc('f23a6ec8', 'CCDG_Baylor_CVD_HemStroke_GERFHS_HMB_WGS_20221215_ANV5_202304242307', pop), - bqsrc('de34ca6e', 'CCDG_Baylor_CVD_HemStroke_Regards_DS_WGS_20221117_ANV5_202304242123', pop), - bqsrc('d9c6f406', 'CCDG_Baylor_CVD_HemStroke_Yale_HMB_WGS_20221215_ANV5_202304242309', pop), - bqsrc('56883e56', 'CCDG_Baylor_CVD_Oregon_SUDS_GRU_WGS_20221215_ANV5_202304242302', pop), - bqsrc('7f3ba7ec', 'CCDG_Baylor_CVD_TexGen_DS_WGS_20221117_ANV5_202304242125', pop), - bqsrc('da965e26', 'CCDG_Baylor_CVD_Ventura_Presto_GRU_IRB_WGS_20221117_ANV5_202304242127', pop), - bqsrc('40647d03', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20240113_ANV5_202401141252'), - bqsrc('83339911', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20240113_ANV5_202401141259'), - bqsrc('3f36066b', 'CCDG_Broad_AI_IBD_Cho_WGS_20240113_ANV5_202403030543'), - bqsrc('65e890b6', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20240113_ANV5_202403030551'), - bqsrc('cec499cd', 'CCDG_Broad_AI_IBD_McCauley_WGS_20240114_ANV5_202403030559'), - bqsrc('8043de16', 'CCDG_Broad_AI_IBD_McGovern_WGS_20240113_ANV5_202403030608'), - bqsrc('de3bfd4e', 'CCDG_Broad_AI_IBD_Newberry_WGS_20240113_ANV5_202403030616'), - bqsrc('ed109b2f', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_Arrays_20230612_ANV5_202306131350', pop), - bqsrc('3d8b62d7', 'CCDG_Broad_CVD_AF_BioVU_HMB_GSO_WES_20221025_ANV5_202304241856', pop), - bqsrc('450ba911', 'CCDG_Broad_CVD_AF_ENGAGE_DS_WES_20230418_ANV5_202304210808', pop), - bqsrc('0768a322', 'CCDG_Broad_CVD_AF_Ellinor_MGH_Arrays_20221024_ANV5_202304211831', pop), - bqsrc('dfabf632', 'CCDG_Broad_CVD_AF_Ellinor_MGH_WES_20221117_ANV5_202304271354', pop), - bqsrc('485eb707', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20230128_ANV5_202304271554', pop), - bqsrc('58dffe5a', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20221103_ANV5_202304242105', pop), - bqsrc('cf7f2c0c', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20221103_ANV5_202304242107', pop), - bqsrc('f896734e', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20221102_ANV5_202304242039', pop), - bqsrc('40c2f4f4', 'CCDG_Broad_CVD_AF_Marcus_UCSF_WES_20221222_ANV5_202304242040', pop), - bqsrc('67117555', 'CCDG_Broad_CVD_AF_Rienstra_WES_20221222_ANV5_202304242035', pop), - bqsrc('c45dd622', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_Arrays_20221103_ANV5_202304242110', pop), - bqsrc('b12d2e52', 'CCDG_Broad_CVD_AF_Swiss_Cases_DS_MDS_WES_20230118_ANV5_202304242112', pop), - bqsrc('d795027d', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20221020_ANV5_202304211823', pop), - bqsrc('642829f3', 'CCDG_Broad_CVD_AF_VAFAR_WES_20221024_ANV5_202304211826', pop), - bqsrc('43f6230a', 'CCDG_Broad_CVD_AFib_AFLMU_WGS_20231008_ANV5_202310091911', pop), - bqsrc('2b135baf', 'CCDG_Broad_CVD_AFib_MGH_WGS_20221024_ANV5_202304211829', pop), - bqsrc('de64d25a', 'CCDG_Broad_CVD_AFib_UCSF_WGS_20221222_ANV5_202304242037', pop), - bqsrc('08216a2c', 'CCDG_Broad_CVD_AFib_Vanderbilt_Ablation_WGS_20221020_ANV5_202304211819', pop), - bqsrc('342c77f2', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202312122054'), - bqsrc('a16f8bac', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202403030943'), - bqsrc('f2179275', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202403030955'), - bqsrc('e8ee6358', 'CCDG_Broad_CVD_EOCAD_VIRGO_WGS_20221024_ANV5_202403031003'), - bqsrc('383d9d9b', 'CCDG_Broad_CVD_PROMIS_GRU_WES_20230418_ANV5_202306211912', pop), - bqsrc('318ae48e', 'CCDG_Broad_CVD_Stroke_BRAVE_WGS_20221107_ANV5_202304241543', pop), - bqsrc('7ea7a6e9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20221107_ANV5_202304241545', pop), - bqsrc('5df71da4', 'CCDG_Broad_MI_InStem_WES_20221122_ANV5_202304242236', pop), - bqsrc('1793828c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSRS_WES_20230324_ANV5_202304241752', pop), - bqsrc('0db6105c', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202402020211'), - bqsrc('70c803d7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20221117_ANV5_202304271400', pop), - bqsrc('1b92691d', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202403031115'), - bqsrc('f5a4a895', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20221117_ANV5_202304271358', pop), - bqsrc('3da39a32', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202403031123'), - bqsrc('b8b8ba44', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20221117_ANV5_202304271356', pop), - bqsrc('b3e42c63', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202403031131'), - bqsrc('a2b20d71', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202402020256'), - bqsrc('f85048a3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20221117_ANV5_202304271401', pop), - bqsrc('b3ef2bd3', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202403031140'), - bqsrc('1cafba94', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202402020305'), - bqsrc('006c9286', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202402020314'), - bqsrc('92905a2b', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20221117_ANV5_202304271403', pop), - bqsrc('33e1bed9', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202402020322'), - bqsrc('3f3ad5c7', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20230118_ANV5_202304271404', pop), - bqsrc('b2a5eccc', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202403031148'), - bqsrc('7a7b911a', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202402020339'), - bqsrc('33634ed0', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202402020347'), - bqsrc('47f93bbb', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202403031156'), - bqsrc('389af3b3', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20221102_ANV5_202403031205'), - bqsrc('ac8e01aa', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20230328_ANV5_202403031213'), - bqsrc('5d4aa202', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202403031222'), - bqsrc('bd066b5a', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20230328_ANV5_202403031231'), - bqsrc('17de3c3b', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202403031239'), - bqsrc('46e7e2ab', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20230328_ANV5_202403031247'), - bqsrc('ba863f29', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20221102_ANV5_202403031256'), - bqsrc('113d9969', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202403031305'), - bqsrc('fd6d20c8', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20230328_ANV5_202403031313'), - bqsrc('55d32c1b', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202403031322'), - bqsrc('844a1ecf', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20221114_ANV5_202403031331'), - bqsrc('1cbd28a5', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20230621_ANV5_202403031340'), - bqsrc('b8b0b663', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202403031348'), - bqsrc('2686a76a', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202403031405'), - bqsrc('05e028a4', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202403031413'), - bqsrc('4a6228be', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20221122_ANV5_202403031421'), - bqsrc('98dddf8f', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20230328_ANV5_202403031430'), - bqsrc('9ed2a64a', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20230110_ANV5_202403031439'), - bqsrc('22a9e8bd', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20221114_ANV5_202403031446'), - bqsrc('517eda47', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20230328_ANV5_202403031454'), - bqsrc('b6e444c4', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20230223_ANV5_202403031503'), - bqsrc('d8145bea', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202403031512'), - bqsrc('67c3b200', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202403031520'), - bqsrc('4476c338', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202403031529'), - bqsrc('5cd83a64', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20221114_ANV5_202403031537'), - bqsrc('5115b904', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202403031547'), - bqsrc('f7fb0742', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202402062057'), - bqsrc('b979e83a', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20230110_ANV5_202403031555'), - bqsrc('54571a90', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20230328_ANV5_202403031603'), - bqsrc('5495da63', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20230110_ANV5_202403031612'), - bqsrc('7275a9bd', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20221114_ANV5_202403031621'), - bqsrc('2c2a7d19', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202403031629'), - bqsrc('edbd02ca', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202403031637'), - bqsrc('225a7340', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202403031645'), - bqsrc('97dadba8', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202402020902'), - bqsrc('6dcb5d39', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202403031701'), - bqsrc('fb4ac7d8', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20221215_ANV5_202402062059'), - bqsrc('5de241b3', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20221207_ANV5_202403031709'), - bqsrc('62a84074', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20221116_ANV5_202403031719'), - bqsrc('7c06247a', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202402062101'), - bqsrc('9042eb4a', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202403031727'), - bqsrc('cb75258b', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202403031735'), - bqsrc('744bc858', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202403031744'), - bqsrc('faff5b2b', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202403031752'), - bqsrc('275b2a46', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20221205_ANV5_202403031800'), - bqsrc('5a548fd8', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202403031809'), - bqsrc('999301d3', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202402021048'), - bqsrc('eda3f720', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20221207_ANV5_202403031817'), - bqsrc('d9e55ea0', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20221207_ANV5_202403031826'), - bqsrc('6a627e94', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20221215_ANV5_202403031834'), - bqsrc('bfa59a11', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20240201_ANV5_202403031842'), - bqsrc('f8d5318a', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202402021131'), - bqsrc('4ef1d979', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20230612_ANV5_202402021139'), - bqsrc('5e00a0df', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202402062105'), - bqsrc('10948836', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20221114_ANV5_202403031858'), - bqsrc('0a247e9e', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202403031906'), - bqsrc('154b4ef8', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202403031915'), - bqsrc('07b8d88c', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20221207_ANV5_202402021226'), - bqsrc('1985a01d', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20221102_ANV5_202403031942'), - bqsrc('ad61c47e', 'CCDG_NHGRI_Broad_ASD_Daly_phs000298_WES_vcf_20230403_ANV5_202304271610', pop), - bqsrc('5e719362', 'CCDG_NYGC_AI_Asthma_Gala2_WGS_20230605_ANV5_202306131248', pop), - bqsrc('2734a0e4', 'CCDG_NYGC_NP_Alz_EFIGA_WGS_20230605_ANV5_202306141705', pop), - bqsrc('710fc60d', 'CCDG_NYGC_NP_Alz_LOAD_WGS_20230605_ANV5_202306131256', pop), - bqsrc('9626b3eb', 'CCDG_NYGC_NP_Alz_WHICAP_WGS_20230605_ANV5_202306131303', pop), - bqsrc('86bb81c0', 'CCDG_NYGC_NP_Autism_ACE2_GRU_MDS_WGS_20230605_ANV5_202403032029'), - bqsrc('85674dce', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202403081651'), - bqsrc('7d1461b2', 'CCDG_NYGC_NP_Autism_SSC_WGS_20230605_ANV5_202403032206'), - bqsrc('25ec7b57', 'CCDG_WASHU_PAGE_20221220_ANV5_202304271544', pop), - bqsrc('15645b8d', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202403040118'), - bqsrc('4a0769c7', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202403040126'), - bqsrc('b9222139', 'CMG_BROAD_BRAIN_ENGLE_WES_20240205_ANV5_202402051624', pop), - bqsrc('7e094253', 'CMG_BROAD_BRAIN_SHERR_WGS_20221102_ANV5_202402281543', pop), - bqsrc('c797490f', 'CMG_BROAD_ORPHAN_SCOTT_WGS_20221102_ANV5_202402281552', pop), - bqsrc('0a21cbfd', 'CMG_BaylorHopkins_HMB_IRB_NPU_WES_20221020_ANV5_202402290528'), - bqsrc('d321333c', 'CMG_BaylorHopkins_HMB_NPU_WES_20230525_ANV5_202402290537'), - bqsrc('0a1360b1', 'CMG_Broad_Blood_Gazda_WES_20221117_ANV5_202402290547', pop), - bqsrc('faa71b49', 'CMG_Broad_Blood_Sankaran_WES_20221117_ANV5_202402290555', pop), - bqsrc('abce6387', 'CMG_Broad_Blood_Sankaran_WGS_20221117_ANV5_202402290606', pop), - bqsrc('3dd4d039', 'CMG_Broad_Brain_Gleeson_WES_20221117_ANV5_202304241517', pop), - bqsrc('c361373f', 'CMG_Broad_Brain_Muntoni_WES_20221102_ANV5_202304241527', pop), - bqsrc('fc6ce406', 'CMG_Broad_Brain_NeuroDev_WES_20240112_ANV5_202401152208'), - bqsrc('d7bfafc6', 'CMG_Broad_Brain_Thaker_WES_20221102_ANV5_202304241531', pop), - bqsrc('7e03b5fd', 'CMG_Broad_Brain_Walsh_WES_20230605_ANV5_202310101734', pop), - bqsrc('29812b42', 'CMG_Broad_Eye_Pierce_WES_20221205_ANV5_202304242250', pop), - bqsrc('48134558', 'CMG_Broad_Eye_Pierce_WGS_20221117_ANV5_202304241507', pop), - bqsrc('36ebaa12', 'CMG_Broad_Heart_PCGC_Tristani_WGS_20221025_ANV5_202304211840', pop), - bqsrc('f9826139', 'CMG_Broad_Heart_Seidman_WES_20221117_ANV5_202304241504', pop), - bqsrc('85952af8', 'CMG_Broad_Kidney_Hildebrandt_WES_20230525_ANV5_202305251733', pop), - bqsrc('ee4ae9a1', 'CMG_Broad_Kidney_Hildebrandt_WGS_20221025_ANV5_202304211844', pop), - bqsrc('cf168274', 'CMG_Broad_Kidney_Pollak_WES_20221025_ANV5_202304211846', pop), - bqsrc('4d47ba2c', 'CMG_Broad_Muscle_Beggs_WGS_20221102_ANV5_202304241533', pop), - bqsrc('82d1271a', 'CMG_Broad_Muscle_Bonnemann_WES_20221117_ANV5_202304241509', pop), - bqsrc('6be3fb25', 'CMG_Broad_Muscle_Bonnemann_WGS_20221117_ANV5_202304241510', pop), - bqsrc('b168eb10', 'CMG_Broad_Muscle_KNC_WES_20221116_ANV5_202304242219', pop), - bqsrc('372244aa', 'CMG_Broad_Muscle_KNC_WGS_20221117_ANV5_202304242221', pop), - bqsrc('c43e7400', 'CMG_Broad_Muscle_Kang_WES_20230525_ANV5_202310101649', pop), - bqsrc('77a6c0aa', 'CMG_Broad_Muscle_Kang_WGS_20221025_ANV5_202304211849', pop), - bqsrc('4153ad1f', 'CMG_Broad_Muscle_Laing_WES_20221208_ANV5_202402291926', pop), - bqsrc('5019143b', 'CMG_Broad_Muscle_Myoseq_WES_20230621_ANV5_202306211852', pop), - bqsrc('27eb651a', 'CMG_Broad_Muscle_Myoseq_WGS_20221208_ANV5_202304271310', pop), - bqsrc('c087af7a', 'CMG_Broad_Muscle_OGrady_WES_20221205_ANV5_202304242252', pop), - bqsrc('db987a2e', 'CMG_Broad_Muscle_Ravenscroft_WES_20221208_ANV5_202304271311', pop), - bqsrc('05df566c', 'CMG_Broad_Muscle_Topf_WES_20221208_ANV5_202304271313', pop), - bqsrc('87d91f06', 'CMG_Broad_Orphan_Chung_WES_20221102_ANV5_202304241534', pop), - bqsrc('25f6b696', 'CMG_Broad_Orphan_Estonia_Ounap_WES_20221117_ANV5_202304241512', pop), - bqsrc('c3b16b41', 'CMG_Broad_Orphan_Estonia_Ounap_WGS_20221205_ANV5_202304242255', pop), - bqsrc('5bbb5a28', 'CMG_Broad_Orphan_Jueppner_WES_20240205_ANV5_202402051640', pop), - bqsrc('32fe2260', 'CMG_Broad_Orphan_Lerner_Ellis_WES_20221102_ANV5_202304241536', pop), - bqsrc('6f9e574e', 'CMG_Broad_Orphan_Manton_WES_20221117_ANV5_202304241513', pop), - bqsrc('53cd689b', 'CMG_Broad_Orphan_Manton_WGS_20221117_ANV5_202304241515', pop), - bqsrc('e7c5babf', 'CMG_Broad_Orphan_Scott_WES_20221025_ANV5_202304241458', pop), - bqsrc('051877f4', 'CMG_Broad_Orphan_Sweetser_WES_20221102_ANV5_202304241539', pop), - bqsrc('555c7706', 'CMG_Broad_Orphan_VCGS_White_WES_20221018_ANV5_202304241522', pop), - bqsrc('3a8f7952', 'CMG_Broad_Orphan_VCGS_White_WGS_20221117_ANV5_202304241523', pop), - bqsrc('b699c5e3', 'CMG_Broad_Rare_RGP_WES_20221102_ANV5_202304241540', pop), - bqsrc('2d5bd095', 'CMG_Broad_Stillbirth_Wilkins_Haug_WES_20221102_ANV5_202304241542', pop), - bqsrc('db7353fb', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20230419_ANV5_202304201858', pop), - bqsrc('3b8ef67a', 'CMG_UWASH_DS_BDIS_20230418_ANV5_202304201958', pop), - bqsrc('5d27ebfe', 'CMG_UWASH_DS_HFA_20230418_ANV5_202304201932', pop), - bqsrc('9d1a6e0a', 'CMG_UWASH_DS_NBIA_20230418_ANV5_202304201949', pop), - bqsrc('18bd3df4', 'CMG_UWASH_HMB_20230418_ANV5_202402070029', pop), - bqsrc('50484f86', 'CMG_UWASH_HMB_IRB_20230418_ANV5_202304201915', pop), - bqsrc('74bd0964', 'CMG_UWash_DS_EP_20230419_ANV5_202304201906', pop), - bqsrc('6f4155f2', 'CMG_UWash_GRU_20240301_ANV5_202403040330', pop), - bqsrc('6486ae96', 'CMG_UWash_GRU_1_20240113_ANV5_202401141440', pop), - bqsrc('97ec5366', 'CMG_UWash_GRU_IRB_20230418_ANV5_202304201940', pop), - bqsrc('cb305c8e', 'CMG_YALE_DS_MC_20221026_ANV5_202402281611'), - bqsrc('c2897355', 'CMG_Yale_DS_BPEAKD_20240113_ANV5_202401141447'), - bqsrc('4b5667f8', 'CMG_Yale_DS_RD_20240113_ANV5_202401141453'), - bqsrc('9e86cb23', 'CMG_Yale_DS_THAL_IRB_20240113_ANV5_202401141500'), - bqsrc('278252c3', 'CMG_Yale_HMB_IRB_20240113_ANV5_202401141507'), - bqsrc('eea2a20c', 'CMH_GAFK_10X_Genomics_20240304_ANV5_202403071539'), - bqsrc('0e0bf0f8', 'CMH_GAFK_ES_20240301_ANV5_202403040338'), - bqsrc('9935aa3f', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202403121355'), - bqsrc('d391ce5f', 'CMH_GAFK_IsoSeq_20240113_ANV5_202402062116'), - bqsrc('beef6734', 'CMH_GAFK_MGI_20240304_ANV5_202403071559'), - bqsrc('8599b1fb', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202403121402'), - bqsrc('94f58e6c', 'CMH_GAFK_SCATAC_20221107_ANV5_202402290954'), - bqsrc('5447de30', 'CMH_GAFK_WGBS_20230327_ANV5_202402062120'), - bqsrc('db73a316', 'CMH_GAFK_WGS_20240113_ANV5_202402062123'), - bqsrc('5227851b', 'CSER_ClinSeq_GRU_20240401_ANV5_202404081541'), - bqsrc('1a706b0c', 'GTEx_Somatic_WGS_20240116_ANV5_202401170147', pop), - bqsrc('8a98bcb4', 'NIMH_Broad_ConvNeuro_McCarroll_Nehme_Levy_CIRM_DS_Village_20240405_ANV5_202404081511'), - bqsrc('c02a5efb', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_VillageData_20230109_ANV5_202402292203'), - bqsrc('817f27aa', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202402081755'), - bqsrc('ddc1d72b', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202402292209'), - bqsrc('14f5afa3', 'NIMH_Broad_WGSPD1_McCarroll_Braff_DS_10XLRGenomes_20221115_ANV5_202310101713', pop), - bqsrc('69e4bc19', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202401152215'), - bqsrc('da595e23', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20240112_ANV5_202401141541'), - bqsrc('94091a22', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_10XLRGenomes_20230331_ANV5_202310101715', pop), - bqsrc('df20901c', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202403071610'), - bqsrc('75e17b99', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202402292215'), - bqsrc('25790186', 'PAGE_BioMe_GRU_WGS_20221128_ANV5_202403040429', pop), - bqsrc('b371989b', 'PAGE_MEC_GRU_WGS_20230131_ANV5_202403040437', pop), - bqsrc('4a4eec27', 'PAGE_SoL_HMB_WGS_20221220_ANV5_202403040445', pop), - bqsrc('a1f917db', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20221128_ANV5_202403040453', pop), - bqsrc('6264931f', 'PAGE_WHI_HMB_IRB_WGS_20221019_ANV5_202403040500', pop), - bqsrc('f3817357', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20221102_ANV5_202403040528'), - bqsrc('23635d1c', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20221024_ANV5_202403040537'), - bqsrc('ecf311e7', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20221117_ANV5_202403040544'), - bqsrc('90923a9d', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20231013_ANV5_202403040600'), - bqsrc('2354d65a', 'ccdg_asc_ndd_daly_talkowski_cdcseed_asd_gsa_md_20221024_ANV5_202402291144', pop), - bqsrc('efc0eb70', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20231013_ANV5_202403040645'), - bqsrc('d1f95953', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20221024_ANV5_202402291153'), - bqsrc('5590427b', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20221024_ANV5_202402291115'), - bqsrc('3cbe3dd3', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20221116_ANV5_202403040753'), - bqsrc('a245d786', 'ccdg_asc_ndd_daly_talkowski_palotie_asd_exome_20221019_ANV5_202403040815', pop), - bqsrc('104705f5', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20221108_ANV5_202403040831'), - bqsrc('a07262c0', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20221025_ANV5_202403040839'), - bqsrc('418e64c1', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20221027_ANV5_202403040854'), - bqsrc('cfe20662', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20230327_ANV5_202403040909'), - bqsrc('7c668a5c', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20230324_ANV5_202403040917'), - bqsrc('0b4c3cfb', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_gsa_md_20221025_ANV5_202402291202', pop), - bqsrc('2571477f', 'ccdg_broad_ai_ibd_daly_burnstein_gsa_20240103_ANV5_202401112154'), - bqsrc('c0abacf6', 'ccdg_broad_ai_ibd_daly_chen_gsa_20240103_ANV5_202401112202'), - bqsrc('c7473b33', 'ccdg_broad_ai_ibd_daly_chen_wes_20240103_ANV5_202403040940'), - bqsrc('ac30439c', 'ccdg_broad_ai_ibd_daly_cho_niddk_gsa_20240103_ANV5_202401112215'), - bqsrc('267ea46f', 'ccdg_broad_ai_ibd_daly_chung_gider_gsa_20240103_ANV5_202401121413'), - bqsrc('c481c20f', 'ccdg_broad_ai_ibd_daly_chung_gider_wes_20240103_ANV5_202403040947'), - bqsrc('938f9e89', 'ccdg_broad_ai_ibd_daly_faubion_share_gsa_20240104_ANV5_202401121427'), - bqsrc('d4b1264d', 'ccdg_broad_ai_ibd_daly_faubion_share_wes_20240104_ANV5_202403040954'), - bqsrc('4d149951', 'ccdg_broad_ai_ibd_daly_franchimont_gsa_20240104_ANV5_202401121441'), - bqsrc('e12ce5bd', 'ccdg_broad_ai_ibd_daly_franchimont_wes_20240104_ANV5_202403041001'), - bqsrc('2c7e5905', 'ccdg_broad_ai_ibd_daly_hyams_protect_gsa_20240311_ANV5_202403121623'), - bqsrc('f5463526', 'ccdg_broad_ai_ibd_daly_kastner_fmf_gsa_20240104_ANV5_202401121503'), - bqsrc('51367192', 'ccdg_broad_ai_ibd_daly_kastner_fmf_nhgri_wes_20240104_ANV5_202401152230'), - bqsrc('7268c3a0', 'ccdg_broad_ai_ibd_daly_kupcinskas_gsa_20240311_ANV5_202403121627'), - bqsrc('51449a60', 'ccdg_broad_ai_ibd_daly_lira_share_wes_20240104_ANV5_202403041035'), - bqsrc('ee1b3121', 'ccdg_broad_ai_ibd_daly_louis_gsa_20240311_ANV5_202403121633'), - bqsrc('083044ec', 'ccdg_broad_ai_ibd_daly_newberry_share_gsa_20240105_ANV5_202401121611'), - bqsrc('10ae29e5', 'ccdg_broad_ai_ibd_daly_newberry_share_wes_20240105_ANV5_202403041117'), - bqsrc('a240ffda', 'ccdg_broad_ai_ibd_daly_niddk_cho_wes_20240105_ANV5_202403041125'), - bqsrc('929acb2a', 'ccdg_broad_ai_ibd_daly_rioux_bitton_igenomed_wes_20240105_ANV5_202401121701'), - bqsrc('fa70ba86', 'ccdg_broad_ai_ibd_daly_rioux_genizon_wes_20240311_ANV5_202403121426'), - bqsrc('6e9030de', 'ccdg_broad_ai_ibd_daly_rioux_igenomed_gsa_20240105_ANV5_202401121709'), - bqsrc('c9265cf7', 'ccdg_broad_ai_ibd_daly_rioux_niddk_gsa_20240108_ANV5_202401121716'), - bqsrc('fe283248', 'ccdg_broad_ai_ibd_daly_rioux_niddk_wes_20240108_ANV5_202403041140'), - bqsrc('3ca098f3', 'ccdg_broad_ai_ibd_daly_sands_msccr_gsa_20240108_ANV5_202401121730'), - bqsrc('fd47ae7f', 'ccdg_broad_ai_ibd_daly_sands_msccr_wes_20240108_ANV5_202403041148'), - bqsrc('4300fbc6', 'ccdg_broad_ai_ibd_daly_silverberg_niddk_gsa_20240108_ANV5_202401121745'), - bqsrc('14285871', 'ccdg_broad_ai_ibd_daly_stampfer_nhs_gsa_20240311_ANV5_202403121637'), - bqsrc('d69ac752', 'ccdg_broad_ai_ibd_daly_stampfer_wes_20240108_ANV5_202403041155'), - bqsrc('268dabf8', 'ccdg_broad_ai_ibd_daly_vermeire_gsa_20240113_ANV5_202402062145'), - bqsrc('636bc565', 'ccdg_broad_ai_ibd_daly_vermeire_wes_20240108_ANV5_202403041203'), - bqsrc('7cc92556', 'ccdg_broad_ai_ibd_daly_xavier_prism_gsa_20240108_ANV5_202402062149'), - bqsrc('6b12cac1', 'ccdg_broad_ai_ibd_daly_xavier_prism_wes_20240108_ANV5_202403041214'), - bqsrc('5d4e150c', 'ccdg_broad_ai_ibd_daly_xavier_share_gsa_20240108_ANV5_202401121819'), - bqsrc('e30e7797', 'ccdg_broad_ai_ibd_daly_xavier_share_wes_20240108_ANV5_202403041224'), - bqsrc('597e5f25', 'ccdg_broad_ai_ibd_niddk_daly_duerr_wes_20240112_ANV5_202403041241'), - bqsrc('2f8b185b', 'ccdg_broad_ai_ibd_niddk_daly_silverberg_wes_20240112_ANV5_202403041250'), - bqsrc('7a0883a4', 'ccdg_broad_cvd_af_pegasus_hmb_20221025_ANV5_202403030736', pop), - bqsrc('f62c5ebd', 'ccdg_broad_cvd_eocad_promis_wgs_20221213_ANV5_202403030935', pop), - bqsrc('9d116a5c', 'ccdg_broad_mi_atvb_ds_cvd_wes_20221025_ANV5_202403031035', pop), - bqsrc('6c0a5f0d', 'ccdg_broad_mi_univutah_ds_cvd_wes_20221026_ANV5_202403031059'), - bqsrc('235663ab', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202403031924'), - bqsrc('81cf50b1', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202403031933'), - bqsrc('e6801146', 'ccdg_nygc_np_autism_hmca_wgs_20221024_ANV5_202403032115'), - bqsrc('64b26798', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202403032311'), - bqsrc('e3065356', 'ccdg_washu_cvd_eocad_biome_wgs_20221024_ANV5_202304211601', pop), - bqsrc('01e3396c', 'ccdg_washu_cvd_eocad_cleveland_wgs_20221024_ANV5_202403040008'), - bqsrc('5e62ca4f', 'ccdg_washu_cvd_eocad_emerge_wgs_20221024_ANV5_202403040026'), - bqsrc('a0d77559', 'ccdg_washu_cvd_eocad_emory_wgs_20221024_ANV5_202403040034'), - bqsrc('33e3428b', 'ccdg_washu_cvd_np_ai_controls_vccontrols_wgs_20221024_ANV5_202403032319', pop), - bqsrc('17c5f983', 'cmg_broad_brain_engle_wgs_20221202_ANV5_202402290614', pop), - bqsrc('1cb73890', 'cmg_broad_heart_ware_wes_20221215_ANV5_202304242145', pop), - bqsrc('833ff0a3', 'eMERGE_GRU_IRB_NPU_eMERGEseq_20230130_ANV5_202304271614', pop), - bqsrc('baf040af', 'eMERGE_GRU_IRB_PUB_NPU_eMERGEseq_20230130_ANV5_202304271616', pop), - bqsrc('270b3b62', 'eMERGE_GRU_IRB_eMERGEseq_20230130_ANV5_202304271613', pop), - bqsrc('c13efbe9', 'eMERGE_GRU_NPU_eMERGEseq_20230130_ANV5_202304271617', pop), - bqsrc('34f8138d', 'eMERGE_GRU_eMERGEseq_20230130_ANV5_202304271612', pop), - bqsrc('90b7b6e8', 'eMERGE_HMB_GSO_eMERGEseq_20230130_ANV5_202304271621', pop), - bqsrc('6e6dca92', 'eMERGE_HMB_IRB_PUB_eMERGEseq_20230130_ANV5_202304271622', pop), - bqsrc('1ddf2a8e', 'eMERGE_HMB_NPU_eMERGEseq_20230130_ANV5_202304271624', pop), - bqsrc('dba97a65', 'eMERGE_HMB_eMERGEseq_20230130_ANV5_202304271619', pop), - bqsrc('51aa9a22', 'eMERGE_PGRNseq_20230118_ANV5_202304241853', pop), - bqsrc('ce8c469f', 'eMERGE_PRS_Arrays_20221220_ANV5_202304271346', pop), - bqsrc('bf91a039', 'nhgri_broad_ibd_daly_winter_wes_20240112_ANV5_202403041315'), -])) - -anvil7_sources = mkdict(anvil6_sources, 256, mkdelta([ - bqsrc('c9e438dc', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202406261957'), - bqsrc('90a1d452', 'GREGoR_R01_GRU_20240208_ANV5_202407011515'), - bqsrc('c27c13db', 'GREGoR_R01_HMB_20240208_ANV5_202407011529'), - bqsrc('3594cc06', 'HPRC_20240401_ANV5_202406261913'), - bqsrc('49f55ff6', 'NIMH_Broad_WGSPD1_McCarroll_Light_DS_WGS_20240625_ANV5_202406262032'), - bqsrc('54040f7f', 'T2T_CHRY_20240301_ANV5_202406271432'), - bqsrc('5048eadd', 'ccdg_broad_ai_ibd_daly_brant_burnstein_utsw_wes_20240627_ANV5_202406271535'), - bqsrc('5d003f44', 'ccdg_broad_daly_igsr_1kg_twist_wes_20240625_ANV5_202406261904') -])) - -anvil8_sources = mkdict(anvil7_sources, 254, mkdelta([ - bqsrc('6fd2f543', '1000G_PRIMED_data_model_20240410_ANV5_202409251724'), - bqsrc('13858a9f', '1000G_high_coverage_2019_20230517_ANV5_202409231755'), - bqsrc('f954ce44', 'African_American_Seq_HGV_20230727_ANV5_202409251735'), - bqsrc('1c288bc8', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202409231808'), - bqsrc('69a5161a', 'CCDG_Broad_AI_IBD_Brant_DS_IBD_WGS_20240113_ANV5_202409302325'), - bqsrc('87ae3152', 'CCDG_Broad_AI_IBD_Brant_HMB_WGS_20240113_ANV5_202410011417'), - bqsrc('0de3f19d', 'CCDG_Broad_AI_IBD_Cho_WGS_20240113_ANV5_202409261925'), - bqsrc('183d3f73', 'CCDG_Broad_AI_IBD_Kugathasan_WGS_20240113_ANV5_202409261935'), - bqsrc('f82d1472', 'CCDG_Broad_AI_IBD_McGovern_WGS_20240113_ANV5_202409262009'), - bqsrc('a173fc34', 'CCDG_Broad_AI_IBD_Newberry_WGS_20240113_ANV5_202409262020'), - bqsrc('b0ce674b', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_Arrays_20230517_ANV5_202410011428'), - bqsrc('f88712d7', 'CCDG_Broad_CVD_EOCAD_PartnersBiobank_HMB_WES_20230621_ANV5_202409262029'), - bqsrc('948779f6', 'CCDG_Broad_CVD_EOCAD_TaiChi_WGS_20221026_ANV5_202409251741'), - bqsrc('ed6900d9', 'CCDG_Broad_CVD_EOCAD_VIRGO_WGS_20221024_ANV5_202409251751'), - bqsrc('38d33c51', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202410011452'), - bqsrc('a6afe2df', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20221101_ANV5_202409262047'), - bqsrc('9bf401b1', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20221101_ANV5_202409262056'), - bqsrc('573b1bec', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20221101_ANV5_202409262105'), - bqsrc('eeaf2d1a', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202410011503'), - bqsrc('3b3be681', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20221026_ANV5_202409262116'), - bqsrc('b8d6b994', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202410011513'), - bqsrc('49cabb98', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202410011523'), - bqsrc('afcff545', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20221108_ANV5_202410011533'), - bqsrc('2df70f51', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20221027_ANV5_202409262125'), - bqsrc('ab5c8456', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202410011544'), - bqsrc('a1b27d24', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202410011554'), - bqsrc('3f080a87', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20230328_ANV5_202409262134'), - bqsrc('465bfaac', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20221102_ANV5_202409262144'), - bqsrc('ccb8a4b7', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20230328_ANV5_202409301706'), - bqsrc('015bb538', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20221108_ANV5_202409262231'), - bqsrc('435613ab', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20230328_ANV5_202409262240'), - bqsrc('2f73c7c1', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202409262249'), - bqsrc('3841aefa', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20230328_ANV5_202409262258'), - bqsrc('99470817', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20221102_ANV5_202409262308'), - bqsrc('46e142ab', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20221108_ANV5_202409262318'), - bqsrc('2648f51f', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20230328_ANV5_202409262327'), - bqsrc('b749c687', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20230328_ANV5_202409262337'), - bqsrc('5d23c09d', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20221114_ANV5_202409262346'), - bqsrc('548a0b21', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20230621_ANV5_202409262355'), - bqsrc('7a345902', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20221026_ANV5_202409270005'), - bqsrc('9f144aec', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20221026_ANV5_202409251801'), - bqsrc('813188f4', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20221027_ANV5_202409301252'), - bqsrc('dca2300f', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20221108_ANV5_202409301302'), - bqsrc('6f0e0649', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20221122_ANV5_202409301311'), - bqsrc('6d3907bb', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20230328_ANV5_202409301321'), - bqsrc('636e501f', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20230110_ANV5_202409231955'), - bqsrc('5271045a', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20221114_ANV5_202409301332'), - bqsrc('941ca2d1', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20230328_ANV5_202409301342'), - bqsrc('d6a4eda2', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20230223_ANV5_202409301352'), - bqsrc('174f3d1e', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20221108_ANV5_202409301402'), - bqsrc('6cb9a7a7', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20221027_ANV5_202409301413'), - bqsrc('87e7f1b9', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20221108_ANV5_202409301423'), - bqsrc('cbfb79d8', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20221114_ANV5_202409301433'), - bqsrc('9bbfe25c', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20221220_ANV5_202409301444'), - bqsrc('d4950205', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20221027_ANV5_202410011604'), - bqsrc('12cc5629', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20230110_ANV5_202409301453'), - bqsrc('b119a402', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20230328_ANV5_202409301503'), - bqsrc('7354f3d2', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20230110_ANV5_202409301514'), - bqsrc('00271874', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20221114_ANV5_202409301526'), - bqsrc('d0749ece', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20230328_ANV5_202409301537'), - bqsrc('52be6def', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20221108_ANV5_202409301547'), - bqsrc('8629a23a', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20221027_ANV5_202409301557'), - bqsrc('c6b049b2', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202410011615'), - bqsrc('1b2e88a4', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20221027_ANV5_202409301607'), - bqsrc('798646a1', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20221215_ANV5_202410011625'), - bqsrc('83ec96c3', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20221207_ANV5_202409301617'), - bqsrc('1e1218b7', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20221116_ANV5_202409301627'), - bqsrc('e421074a', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20230612_ANV5_202410011634'), - bqsrc('a7b2b8bc', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20221027_ANV5_202409232006'), - bqsrc('9dc6e713', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20221027_ANV5_202409232016'), - bqsrc('69531ad0', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20221027_ANV5_202409232025'), - bqsrc('a1fbb513', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20221110_ANV5_202409301638'), - bqsrc('797b067f', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20221205_ANV5_202409301648'), - bqsrc('acd1fc6d', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20230328_ANV5_202409301657'), - bqsrc('e44dfa03', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202410011646'), - bqsrc('5521223c', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20221207_ANV5_202409302000'), - bqsrc('c5ca49db', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20221207_ANV5_202409302009'), - bqsrc('8a9bc88a', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20221215_ANV5_202409302018'), - bqsrc('302dbf9e', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20240201_ANV5_202409302028'), - bqsrc('2d22bf8b', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202410011657'), - bqsrc('23486b33', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20230612_ANV5_202410011708'), - bqsrc('76142d3b', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20221108_ANV5_202410011719'), - bqsrc('3e1c2a3e', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20221114_ANV5_202409302037'), - bqsrc('fd6ee483', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20230328_ANV5_202409302046'), - bqsrc('416b4095', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20230328_ANV5_202409302055'), - bqsrc('a8099f9d', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20221207_ANV5_202410011732'), - bqsrc('7529b6b1', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20221102_ANV5_202409232034'), - bqsrc('27e59539', 'CCDG_NYGC_NP_Autism_ACE2_DS_MDS_WGS_20230605_ANV5_202409302125'), - bqsrc('37c78fc4', 'CCDG_NYGC_NP_Autism_ACE2_GRU_MDS_WGS_20230605_ANV5_202409232043'), - bqsrc('e3ebc7f0', 'CCDG_NYGC_NP_Autism_AGRE_WGS_20230605_ANV5_202410011742'), - bqsrc('1c1b8f44', 'CCDG_NYGC_NP_Autism_CAG_DS_WGS_20230605_ANV5_202409232053'), - bqsrc('f32ef49b', 'CCDG_NYGC_NP_Autism_HFA_DS_WGS_20230605_ANV5_202409232102'), - bqsrc('82575f4a', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_DS_WGS_20221103_ANV5_202409232112'), - bqsrc('25858a7b', 'CCDG_NYGC_NP_Autism_PELPHREY_ACE_GRU_WGS_20221103_ANV5_202409241351'), - bqsrc('8302ff1f', 'CCDG_NYGC_NP_Autism_SAGE_WGS_20230605_ANV5_202409302144'), - bqsrc('443d8d20', 'CCDG_NYGC_NP_Autism_SSC_WGS_20230605_ANV5_202409302154'), - bqsrc('660280f8', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_DS_WGS_20230525_ANV5_202409302255'), - bqsrc('4a0769c7', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202403040126', pop), - bqsrc('c2a2b724', 'CMG_YALE_DS_MC_20221026_ANV5_202409302315'), - bqsrc('f961f617', 'CMG_YALE_DS_RARED_20221020_ANV5_202409251714'), - bqsrc('5d222190', 'CMG_Yale_DS_BPEAKD_20240113_ANV5_202410011754'), - bqsrc('fe056168', 'CMG_Yale_DS_RD_20240113_ANV5_202410011804'), - bqsrc('06182245', 'CMG_Yale_DS_THAL_IRB_20240113_ANV5_202410011814'), - bqsrc('ad307392', 'CMG_Yale_GRU_20221020_ANV5_202402281628', pop), - bqsrc('35779fe0', 'CMG_Yale_HMB_20221020_ANV5_202410011825'), - bqsrc('cebe6de0', 'CMG_Yale_HMB_GSO_20221020_ANV5_202410011834'), - bqsrc('5c7f0d2a', 'CMG_Yale_HMB_IRB_20240113_ANV5_202410011846'), - bqsrc('abdbf318', 'CMH_GAFK_10X_Genomics_20240304_ANV5_202409251809'), - bqsrc('a3097787', 'CMH_GAFK_ES_20240301_ANV5_202409251815'), - bqsrc('e4a5f270', 'CMH_GAFK_GS_linked_read_20221107_ANV5_202409251830'), - bqsrc('0eaa72dc', 'CMH_GAFK_GS_long_read_20240301_ANV5_202409251840'), - bqsrc('54e0207f', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202409231642'), - bqsrc('b69c3ccd', 'CMH_GAFK_IsoSeq_20240113_ANV5_202409251851'), - bqsrc('aa13412e', 'CMH_GAFK_MGI_20240304_ANV5_202409251952'), - bqsrc('61c255d4', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202409231650'), - bqsrc('e6bc59ce', 'CMH_GAFK_WGS_20240113_ANV5_202409252004'), - bqsrc('2b8418a9', 'CSER_CHARM_GRU_20240301_ANV5_202410021502'), - bqsrc('3c4e3e42', 'CSER_NYCKIDSEQ_GRU_20240113_ANV5_202409252031'), - bqsrc('bfb01f90', 'CSER_NYCKIDSEQ_HMB_20240113_ANV5_202409252040'), - bqsrc('9fb7b90a', 'CSER_P3EGS_GRU_20230727_ANV5_202409252049'), - bqsrc('0e626b88', 'CSER_SouthSeq_GRU_20221208_ANV5_202410021513'), - bqsrc('c60190e0', 'GREGoR_R01_GRU_20240208_ANV5_202408141711'), - bqsrc('9a665ca1', 'GREGoR_R01_HMB_20240208_ANV5_202408141715'), - bqsrc('14f8d940', 'GTEx_BCM_GRU_CoRSIVs_20240116_ANV5_202409252058'), - bqsrc('44ba7ece', 'GTEx_V8_hg38_20240116_ANV5_202409251632'), - bqsrc('5fd8c286', 'GTEx_public_data_20240117_ANV5_202409252106'), - bqsrc('c1e66f15', 'HPRC_20240401_ANV5_202409251654'), - bqsrc('5dff1da7', 'NIA_CARD_Coriell_Cell_Lines_Open_20230727_ANV5_202410021438'), - bqsrc('65212bf5', 'NIA_CARD_LR_WGS_NABEC_GRU_20230727_ANV5_202410021449'), - bqsrc('9ffeaa82', 'NIMH_Broad_ConvNeuro_McCarroll_Nehme_Levy_CIRM_DS_Village_20240405_ANV5_202409201403'), - bqsrc('7f246585', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202409252122'), - bqsrc('62c7b77b', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_Finkel_SMA_DS_WGS_20230109_ANV5_202409252132'), - bqsrc('0050666b', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202409252139'), - bqsrc('591f1c1e', 'NIMH_Broad_WGSPD1_McCarroll_Escamilla_DS_WGS_20240112_ANV5_202410011855'), - bqsrc('59c59a28', 'NIMH_Broad_WGSPD1_McCarroll_Light_DS_WGS_20240625_ANV5_202409252147'), - bqsrc('754928da', 'NIMH_Broad_WGSPD1_McCarroll_Pato_GRU_WGS_20240112_ANV5_202410011904'), - bqsrc('cce0dbdc', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202409252156'), - bqsrc('2bf3361f', 'NIMH_CIRM_FCDI_ConvergentNeuro_McCarroll_Eggan_GRU_Arrays_20230109_ANV5_202409252204'), - bqsrc('95f60999', 'T2T_20230714_ANV5_202409252214'), - bqsrc('a20ffbf1', 'T2T_CHRY_20240301_ANV5_202409252300'), - bqsrc('c4be3462', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20221102_ANV5_202409241400'), - bqsrc('842d0cc0', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20221024_ANV5_202409241411'), - bqsrc('40c6c06f', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20221117_ANV5_202409241420'), - bqsrc('56136832', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20231013_ANV5_202409241432'), - bqsrc('dd505610', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20221108_ANV5_202409241442'), - bqsrc('b2e1bb0d', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20230327_ANV5_202409241451'), - bqsrc('5382a45d', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20221107_ANV5_202409241501'), - bqsrc('ffb6d106', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20221201_ANV5_202409241511'), - bqsrc('0de6bd0f', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20231013_ANV5_202409241521'), - bqsrc('eced0cb1', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20221107_ANV5_202409241530'), - bqsrc('849f26ad', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20221024_ANV5_202409252359'), - bqsrc('ef7e53e2', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20221107_ANV5_202409241541'), - bqsrc('4155fb6c', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20221107_ANV5_202409241551'), - bqsrc('7ef162bf', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20221108_ANV5_202409241559'), - bqsrc('0aba3c39', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20221109_ANV5_202409241611'), - bqsrc('49fb2096', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20221024_ANV5_202409252350'), - bqsrc('ac3a764d', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20221116_ANV5_202409241620'), - bqsrc('3829964a', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20221108_ANV5_202409241630'), - bqsrc('903678e2', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20221108_ANV5_202409241638'), - bqsrc('9d656629', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20221108_ANV5_202409241649'), - bqsrc('faa3347c', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20221025_ANV5_202409241658'), - bqsrc('2ea97771', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_wgs_20221027_ANV5_202409241814'), - bqsrc('88aae06e', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20221027_ANV5_202409241823'), - bqsrc('08215fa6', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20230327_ANV5_202409241833'), - bqsrc('6f17c190', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20230324_ANV5_202409241842'), - bqsrc('333a3617', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20221108_ANV5_202409241851'), - bqsrc('1e362e1d', 'ccdg_broad_ai_ibd_daly_brant_burnstein_utsw_wes_20240627_ANV5_202409260008'), - bqsrc('8918d261', 'ccdg_broad_ai_ibd_daly_brant_niddk_gsa_20240103_ANV5_202409260018'), - bqsrc('f5fdd89a', 'ccdg_broad_ai_ibd_daly_burnstein_gsa_20240103_ANV5_202409261428'), - bqsrc('91d1ab8e', 'ccdg_broad_ai_ibd_daly_chen_gsa_20240103_ANV5_202409261437'), - bqsrc('acab4546', 'ccdg_broad_ai_ibd_daly_chen_wes_20240103_ANV5_202409241859'), - bqsrc('45c2ba3b', 'ccdg_broad_ai_ibd_daly_cho_niddk_gsa_20240103_ANV5_202409261446'), - bqsrc('185d52bd', 'ccdg_broad_ai_ibd_daly_chung_gider_gsa_20240103_ANV5_202409261456'), - bqsrc('01e1177c', 'ccdg_broad_ai_ibd_daly_chung_gider_wes_20240103_ANV5_202409241909'), - bqsrc('94a46beb', 'ccdg_broad_ai_ibd_daly_faubion_share_gsa_20240104_ANV5_202409261505'), - bqsrc('b2456308', 'ccdg_broad_ai_ibd_daly_faubion_share_wes_20240104_ANV5_202409241919'), - bqsrc('dde3655d', 'ccdg_broad_ai_ibd_daly_franchimont_gsa_20240104_ANV5_202409261515'), - bqsrc('73a081bb', 'ccdg_broad_ai_ibd_daly_franchimont_wes_20240104_ANV5_202409241929'), - bqsrc('f4d731a0', 'ccdg_broad_ai_ibd_daly_hyams_protect_gsa_20240311_ANV5_202409231656'), - bqsrc('21868172', 'ccdg_broad_ai_ibd_daly_hyams_protect_wes_20240104_ANV5_202409241940'), - bqsrc('a6a40cd9', 'ccdg_broad_ai_ibd_daly_kastner_fmf_gsa_20240104_ANV5_202409261525'), - bqsrc('4b41d063', 'ccdg_broad_ai_ibd_daly_kastner_fmf_nhgri_wes_20240104_ANV5_202409261534'), - bqsrc('59084d62', 'ccdg_broad_ai_ibd_daly_kupcinskas_gsa_20240311_ANV5_202409231708'), - bqsrc('e56d71fd', 'ccdg_broad_ai_ibd_daly_kupcinskas_wes_20240104_ANV5_202409241949'), - bqsrc('98f1acc9', 'ccdg_broad_ai_ibd_daly_lewis_sparc_gsa_20240104_ANV5_202409261543'), - bqsrc('5b2de91f', 'ccdg_broad_ai_ibd_daly_lira_share_wes_20240104_ANV5_202409242001'), - bqsrc('144a86c1', 'ccdg_broad_ai_ibd_daly_louis_gsa_20240311_ANV5_202409231721'), - bqsrc('5dd9d83b', 'ccdg_broad_ai_ibd_daly_louis_wes_20240104_ANV5_202409242011'), - bqsrc('f11a2ad0', 'ccdg_broad_ai_ibd_daly_mccauley_wes_20240104_ANV5_202409242021'), - bqsrc('2b74b327', 'ccdg_broad_ai_ibd_daly_mcgovern_niddk_wes_20240104_ANV5_202409242032'), - bqsrc('91cac0e9', 'ccdg_broad_ai_ibd_daly_moayyedi_imagine_gsa_20240105_ANV5_202409261605'), - bqsrc('1cde4183', 'ccdg_broad_ai_ibd_daly_newberry_share_gsa_20240105_ANV5_202409261616'), - bqsrc('7a369c1e', 'ccdg_broad_ai_ibd_daly_newberry_share_wes_20240105_ANV5_202409242046'), - bqsrc('db6e29bb', 'ccdg_broad_ai_ibd_daly_niddk_cho_wes_20240105_ANV5_202409242056'), - bqsrc('820a5c30', 'ccdg_broad_ai_ibd_daly_pekow_share_gsa_20240105_ANV5_202409261627'), - bqsrc('69911b0d', 'ccdg_broad_ai_ibd_daly_pekow_share_wes_20240105_ANV5_202409242107'), - bqsrc('77ec2fe1', 'ccdg_broad_ai_ibd_daly_rioux_bitton_igenomed_wes_20240105_ANV5_202409261636'), - bqsrc('08a64b4f', 'ccdg_broad_ai_ibd_daly_rioux_genizon_wes_20240311_ANV5_202409231732'), - bqsrc('13a5c9e0', 'ccdg_broad_ai_ibd_daly_rioux_igenomed_gsa_20240105_ANV5_202409261645'), - bqsrc('d8202699', 'ccdg_broad_ai_ibd_daly_rioux_niddk_gsa_20240108_ANV5_202409261653'), - bqsrc('67df5d1d', 'ccdg_broad_ai_ibd_daly_rioux_niddk_wes_20240108_ANV5_202409242116'), - bqsrc('f0ead8f9', 'ccdg_broad_ai_ibd_daly_sands_msccr_gsa_20240108_ANV5_202409261705'), - bqsrc('f890c249', 'ccdg_broad_ai_ibd_daly_sands_msccr_wes_20240108_ANV5_202409242127'), - bqsrc('065c8f18', 'ccdg_broad_ai_ibd_daly_silverberg_niddk_gsa_20240108_ANV5_202409261715'), - bqsrc('401b1cf7', 'ccdg_broad_ai_ibd_daly_stampfer_nhs_gsa_20240311_ANV5_202409231743'), - bqsrc('034aecb5', 'ccdg_broad_ai_ibd_daly_stampfer_wes_20240108_ANV5_202409251505'), - bqsrc('f3933ea2', 'ccdg_broad_ai_ibd_daly_vermeire_gsa_20240113_ANV5_202409261726'), - bqsrc('dbda69e3', 'ccdg_broad_ai_ibd_daly_vermeire_wes_20240108_ANV5_202409251516'), - bqsrc('d1d3e261', 'ccdg_broad_ai_ibd_daly_xavier_prism_gsa_20240108_ANV5_202409261740'), - bqsrc('1918c027', 'ccdg_broad_ai_ibd_daly_xavier_prism_wes_20240108_ANV5_202409251531'), - bqsrc('900597b7', 'ccdg_broad_ai_ibd_daly_xavier_share_gsa_20240108_ANV5_202409261751'), - bqsrc('e9e9f233', 'ccdg_broad_ai_ibd_daly_xavier_share_wes_20240108_ANV5_202409251548'), - bqsrc('851fd8f7', 'ccdg_broad_ai_ibd_niddk_daly_duerr_wes_20240112_ANV5_202409251558'), - bqsrc('13a30243', 'ccdg_broad_ai_ibd_niddk_daly_silverberg_wes_20240112_ANV5_202409251610'), - bqsrc('eceddedc', 'ccdg_broad_daly_igsr_1kg_twist_wes_20240625_ANV5_202409231828'), - bqsrc('96417715', 'ccdg_broad_mi_univutah_ds_cvd_wes_20221026_ANV5_202409231943'), - bqsrc('ff012258', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202409302105'), - bqsrc('61b6b42b', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202409302116'), - bqsrc('c27e3cda', 'ccdg_nygc_np_autism_hmca_wgs_20221024_ANV5_202409302135'), - bqsrc('f4073027', 'ccdg_nygc_np_autism_tasc_wgs_20221024_ANV5_202409302207'), - bqsrc('6f9855f6', 'ccdg_washu_ai_t1d_t1dgc_wgs_20221031_ANV5_202409302216'), - bqsrc('95a5e448', 'ccdg_washu_cvd_eocad_cleveland_wgs_20221024_ANV5_202409302226'), - bqsrc('9faffbb3', 'ccdg_washu_cvd_eocad_emerge_wgs_20221024_ANV5_202409302235'), - bqsrc('5d6d4dc4', 'ccdg_washu_cvd_eocad_emory_wgs_20221024_ANV5_202409302245'), - bqsrc('db95c1cc', 'nhgri_broad_ibd_daly_winter_wes_20240112_ANV5_202409251622'), -])) - -anvil9_sources = mkdict(anvil8_sources, 280, mkdelta([ - bqsrc('b555b2f5', '1000G_PRIMED_data_model_20240410_ANV5_202502211647'), - bqsrc('265cde27', 'ALSCompute_Collection_GRU_20231016_ANV5_202410310107'), - bqsrc('853bbf15', 'ALSCompute_Collection_HMB_20241018_ANV5_202410232001'), - bqsrc('5216abda', 'CCDG_Broad_AI_IBD_McCauley_WGS_20240114_ANV5_202502052138'), - bqsrc('078d7ad8', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_Arrays_20250206_ANV5_202502201726'), - bqsrc('ea743ee1', 'CCDG_Broad_CVD_AF_Figtree_BioHeart_HMB_WES_20250206_ANV5_202502201731'), - bqsrc('088ebd14', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_Arrays_20250206_ANV5_202502201736'), - bqsrc('147aff13', 'CCDG_Broad_CVD_AF_GAPP_DS_MDS_WES_20250206_ANV5_202502201740'), - bqsrc('897ad869', 'CCDG_Broad_CVD_AF_Marcus_UCSF_Arrays_20250206_ANV5_202502201745'), - bqsrc('ac2033df', 'CCDG_Broad_CVD_AF_Marcus_UCSF_HMB_WES_20250206_ANV5_202502201749'), - bqsrc('190647f7', 'CCDG_Broad_CVD_AF_VAFAR_Arrays_20250219_ANV5_202502201753'), - bqsrc('9694bb1c', 'CCDG_Broad_MI_ATVB_DS_CVD_WES_20250206_ANV5_202502201757'), - bqsrc('6a7bfee9', 'CCDG_Broad_MI_BRAVE_GRU_WES_20250206_ANV5_202502201801'), - bqsrc('12048ad1', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20230128_ANV5_202502201806'), - bqsrc('f101bba7', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20230621_ANV5_202502201810'), - bqsrc('1f264a75', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20230128_ANV5_202502201814'), - bqsrc('6d9ad64a', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20230314_ANV5_202502201818'), - bqsrc('724f8958', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20240201_ANV5_202502201822'), - bqsrc('1d96b10b', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20240201_ANV5_202502201826'), - bqsrc('aa7f9c50', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20240201_ANV5_202502201830'), - bqsrc('aef3d233', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20240201_ANV5_202502201834'), - bqsrc('95c60b51', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20230126_ANV5_202502201839'), - bqsrc('b5486758', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20230131_ANV5_202502201843'), - bqsrc('0ef2f4b9', 'CMG_UWASH_DS_BAV_IRB_PUB_RD_20250206_ANV5_202502201846'), - bqsrc('e85fc320', 'CMG_UWASH_DS_BDIS_20250206_ANV5_202502201850'), - bqsrc('a5e0fb2a', 'CMG_UWASH_DS_HFA_20250206_ANV5_202502201859'), - bqsrc('28813dc5', 'CMG_UWASH_DS_NBIA_20250206_ANV5_202502201903'), - bqsrc('418e6f5b', 'CMG_UWASH_HMB_20250219_ANV5_202502201916'), - bqsrc('11e44295', 'CMG_UWASH_HMB_IRB_20250219_ANV5_202502201921'), - bqsrc('2c303369', 'CMG_UWash_DS_CHDEF_20250224_ANV5_202502241753'), - bqsrc('0e0af0a8', 'CMG_UWash_DS_EP_20250219_ANV5_202502201854'), - bqsrc('766c47dd', 'CMG_UWash_GRU_20250224_ANV5_202502241706'), - bqsrc('aa22e87e', 'CMG_UWash_GRU_IRB_20250224_ANV5_202502241723'), - bqsrc('7c44dbc8', 'CMH_GAFK_ES_20240301_ANV5_202502201925'), - bqsrc('97f2fa00', 'CMH_GAFK_GS_long_read_20240301_ANV5_202502201932'), - bqsrc('456a8996', 'CMH_GAFK_IlluminaGSA_20240311_ANV5_202502201937'), - bqsrc('9263e232', 'CMH_GAFK_IsoSeq_20240113_ANV5_202502201941'), - bqsrc('ef718b6b', 'CMH_GAFK_PacBio_methyl_tagged_20240311_ANV5_202502201945'), - bqsrc('de339830', 'CMH_GAFK_WGS_20240113_ANV5_202502201948'), - bqsrc('706dd75a', 'DepMap_HMB_20240827_ANV5_202410240027'), - bqsrc('cab8b4b3', 'DepMap_HMB_R2_20250224_ANV5_202502241800'), - bqsrc('d4765cad', 'AnVIL_ENCORE_RS293_20250304_ANV5_202503042020'), - bqsrc('e944e571', 'GREGoR_R01_GRU_20240208_ANV5_202502202158'), - bqsrc('24806158', 'GREGoR_R01_HMB_20240208_ANV5_202502202202'), - bqsrc('88e6ae93', 'GREGoR_R02_HMB_20241105_ANV5_202502202131'), - bqsrc('46fcac4d', 'MAS_ISO_seq_20240113_ANV5_202409261333'), - bqsrc('2e8d7a0e', 'NIMH_Broad_ConvergentNeuro_McCarroll_Eggan_CIRM_GRU_WGS_20240206_ANV5_202502202026'), - bqsrc('14a38418', 'NIMH_Broad_WGSPD1_McCarroll_COGS_DS_WGS_20240113_ANV5_202502202034'), - bqsrc('91ecebc5', 'NIMH_Broad_WGSPD_1_McCarroll_Braff_DS_WGS_20240304_ANV5_202502202030'), -])) - -anvil10_sources = mkdict(anvil9_sources, 283, mkdelta([ - bqsrc('6db4e098', 'CCDG_Baylor_CVD_ARIC_20231008_ANV5_202503171456'), - bqsrc('14967a4d', 'CCDG_WashU_CVD_EOCAD_WashU_CAD_GRU_IRB_WGS_20230525_ANV5_202503171543'), - bqsrc('2c000b04', 'FetalGenomics_PrenatalSEQ_20250520_ANV5_202505201718'), - bqsrc('8ae2d6e6', 'ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_20250514_ANV5_202505191331'), - bqsrc('1841de51', 'ccdg_asc_ndd_daly_talkowski_IBIS_asd_exome_20250514_ANV5_202505191529'), - bqsrc('9a9bd879', 'ccdg_asc_ndd_daly_talkowski_TASC_asd_exome_20250515_ANV5_202505191729'), - bqsrc('bf0b5c71', 'ccdg_asc_ndd_daly_talkowski_ac_boston_asd_exome_20250507_ANV5_202505080038'), - bqsrc('de55c8ca', 'ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome_20250514_ANV5_202505191339'), - bqsrc('05f81c53', 'ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome_20250514_ANV5_202505191347'), - bqsrc('ec751e53', 'ccdg_asc_ndd_daly_talkowski_brusco_asd_exome_20250514_ANV5_202505191354'), - bqsrc('e880eb24', 'ccdg_asc_ndd_daly_talkowski_chung_asd_exome_20250514_ANV5_202505191404'), - bqsrc('14218b2f', 'ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome_20250514_ANV5_202505191412'), - bqsrc('44dd3f7f', 'ccdg_asc_ndd_daly_talkowski_domenici_asd_exome_20250514_ANV5_202505191423'), - bqsrc('b497e3bd', 'ccdg_asc_ndd_daly_talkowski_gargus_asd_exome_20250514_ANV5_202505191431'), - bqsrc('23e0cf66', 'ccdg_asc_ndd_daly_talkowski_goethe_asd_exome_20250514_ANV5_202505191439'), - bqsrc('dae963c2', 'ccdg_asc_ndd_daly_talkowski_gurrieri_asd_exome_20250514_ANV5_202505191448'), - bqsrc('02a4023f', 'ccdg_asc_ndd_daly_talkowski_herman_asd_exome_20250514_ANV5_202505191456'), - bqsrc('562d7351', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_exome_20250514_ANV5_202505191503'), - bqsrc('90758277', 'ccdg_asc_ndd_daly_talkowski_hertz_picciotto_asd_wgs_20250514_ANV5_202505191512'), - bqsrc('cd6d2aa6', 'ccdg_asc_ndd_daly_talkowski_hultman_asd_exome_20250514_ANV5_202505191519'), - bqsrc('d21464d2', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome_20250514_ANV5_202505191537'), - bqsrc('186b5498', 'ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs_20250514_ANV5_202505191546'), - bqsrc('7fc43dc3', 'ccdg_asc_ndd_daly_talkowski_lattig_asd_exome_20250514_ANV5_202505191554'), - bqsrc('e4ab9e05', 'ccdg_asc_ndd_daly_talkowski_mayo_asd_exome_20250515_ANV5_202505191602'), - bqsrc('81d3c9c8', 'ccdg_asc_ndd_daly_talkowski_mcpartland_asd_exome_20250515_ANV5_202505191610'), - bqsrc('0b020eb2', 'ccdg_asc_ndd_daly_talkowski_menashe_asd_exome_20250515_ANV5_202505191620'), - bqsrc('f49b0d4c', 'ccdg_asc_ndd_daly_talkowski_minshew_asd_exome_20250515_ANV5_202505191632'), - bqsrc('d0e9181a', 'ccdg_asc_ndd_daly_talkowski_parellada_asd_exome_20250515_ANV5_202505191640'), - bqsrc('3965a084', 'ccdg_asc_ndd_daly_talkowski_passos_bueno_asd_exome_20250515_ANV5_202505191651'), - bqsrc('c9d758bb', 'ccdg_asc_ndd_daly_talkowski_pericak_vance_asd_exome__20250515_ANV5_202505191658'), - bqsrc('45f811c7', 'ccdg_asc_ndd_daly_talkowski_persico_asd_exome_20250515_ANV5_202505191706'), - bqsrc('c3a807a3', 'ccdg_asc_ndd_daly_talkowski_renieri_asd_exome_20250515_ANV5_202505191714'), - bqsrc('d610c1c1', 'ccdg_asc_ndd_daly_talkowski_schloesser_asd_exome_20250515_ANV5_202505191722'), - bqsrc('ea8f0099', 'ccdg_asc_ndd_daly_talkowski_weiss_asd_exome_20250515_ANV5_202505191738'), - bqsrc('f143f633', 'ccdg_broad_ai_ibd_daly_mcgovern_share_wes_20240104_ANV5_202503171541'), -])) - -anvil11_sources = mkdict(anvil10_sources, 371, mkdelta([ - bqsrc('afe52c93', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSA_MD_20250718_ANV5_202508070436'), - bqsrc('a0e71864', 'CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_WES_20250718_ANV5_202507300051'), - bqsrc('d4e6fade', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_GSA_MD_20250718_ANV5_202508051209'), - bqsrc('3c8c822c', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPIL_BA_MDS_WES_20250718_ANV5_202507300117'), - bqsrc('1c40baac', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_GSA_MD_20250718_ANV5_202508050140'), - bqsrc('6de194a7', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EPI_BA_ID_MDS_WES_20250718_ANV5_202507300109'), - bqsrc('76e40236', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_GSA_MD_20250718_ANV5_202508050131'), - bqsrc('d1f97905', 'CCDG_Broad_NP_Epilepsy_AUSAUS_EP_BA_CN_ID_MDS_WES_20250718_ANV5_202507300059'), - bqsrc('6b600036', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_IRB_WES_20250718_ANV5_202507300125'), - bqsrc('21abf32b', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_GSA_MD_20250718_ANV5_202508051217'), - bqsrc('a9edefce', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_NPU_IRB_GSA_MD_20250718_ANV5_202508051225'), - bqsrc('1a3c4c17', 'CCDG_Broad_NP_Epilepsy_AUSRMB_DS_EAED_MDS_WES_20250718_ANV5_202507311756'), - bqsrc('f297f460', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_ADLT_WES_20250718_ANV5_202507300140'), - bqsrc('d570baa0', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_MDS_NPU_GSA_MD_20250718_ANV5_202508051233'), - bqsrc('e459e688', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_NPU_ADLT_GSA_MD_20250718_ANV5_202508051240'), - bqsrc('4497c1cf', 'CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_WES_20250718_ANV5_202507300148'), - bqsrc('661aa0d4', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_GSA_MD_20250718_ANV5_202508051249'), - bqsrc('1432eb38', 'CCDG_Broad_NP_Epilepsy_BELATW_GRU_WES_20250718_ANV5_202507300156'), - bqsrc('41cb412f', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_GSA_MD_20250718_ANV5_202508051256'), - bqsrc('c5c63622', 'CCDG_Broad_NP_Epilepsy_BELULB_DS_EP_NPU_WES_20250718_ANV5_202507311805'), - bqsrc('0374d242', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_MDS_NPU_GSA_MD_20250718_ANV5_202508051303'), - bqsrc('03d1e104', 'CCDG_Broad_NP_Epilepsy_BRAUSP_DS_WES_20250718_ANV5_202507311814'), - bqsrc('2abd204e', 'CCDG_Broad_NP_Epilepsy_CANCAL_GRU_v2_WES_20250718_ANV5_202507300218'), - bqsrc('4427fad3', 'CCDG_Broad_NP_Epilepsy_CANCAL_GSA_MD_20250718_ANV5_202508051311'), - bqsrc('bde9ccdd', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_GSA_MD_20250718_ANV5_202508051319'), - bqsrc('c01aff87', 'CCDG_Broad_NP_Epilepsy_CANUTN_DS_EP_WES_20250718_ANV5_202507300230'), - bqsrc('32f47a85', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_GSA_MD_20250718_ANV5_202508051327'), - bqsrc('38c9f789', 'CCDG_Broad_NP_Epilepsy_CHEUBB_HMB_IRB_MDS_WES_20250718_ANV5_202507300237'), - bqsrc('11d221ae', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_GSA_MD_20250718_ANV5_202508051334'), - bqsrc('1e8df3c0', 'CCDG_Broad_NP_Epilepsy_CYPCYP_HMB_NPU_MDS_WES_20250718_ANV5_202507300245'), - bqsrc('605ffd28', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_GSA_MD_20250721_ANV5_202508051342'), - bqsrc('69b2412d', 'CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_WES_20250721_ANV5_202507300252'), - bqsrc('c209fbb5', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_GSA_MD_20250721_ANV5_202508051355'), - bqsrc('521fffef', 'CCDG_Broad_NP_Epilepsy_DEUPUM_HMB_MDS_WES_20250721_ANV5_202507300259'), - bqsrc('89f25203', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_GSA_MD_20250721_ANV5_202508051403'), - bqsrc('3efe17c5', 'CCDG_Broad_NP_Epilepsy_DEUUGS_DS_EP_MDS_WES_20250721_ANV5_202507300307'), - bqsrc('1a1d321e', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051412'), - bqsrc('50e7f491', 'CCDG_Broad_NP_Epilepsy_DEUUKB_HMB_NPU_MDS_WES_20250721_ANV5_202507311823'), - bqsrc('d6d892c9', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_GSA_MD_20250721_ANV5_202508051421'), - bqsrc('8ea29289', 'CCDG_Broad_NP_Epilepsy_DEUUKL_HMB_WES_20250721_ANV5_202507300323'), - bqsrc('51b097e1', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_GSA_MD_20250721_ANV5_202508051429'), - bqsrc('9b1b6c81', 'CCDG_Broad_NP_Epilepsy_DEUULG_GRU_WES_20250721_ANV5_202507300331'), - bqsrc('bfd88a7f', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051437'), - bqsrc('eadf2c20', 'CCDG_Broad_NP_Epilepsy_DEUUTB_HMB_NPU_MDS_WES_20250721_ANV5_202507311835'), - bqsrc('fdd88715', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_CO_MORBIDI_MDS_WES_20250721_ANV5_202507300347'), - bqsrc('c040bb51', 'CCDG_Broad_NP_Epilepsy_FINKPH_EPIL_MDS_GSA_MD_20250721_ANV5_202508051446'), - bqsrc('d8d5745a', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051454'), - bqsrc('4bb9bad6', 'CCDG_Broad_NP_Epilepsy_FINUVH_HMB_NPU_MDS_WES_20250721_ANV5_202507300355'), - bqsrc('7205017c', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_GSA_MD_20250721_ANV5_202508051503'), - bqsrc('a1074acf', 'CCDG_Broad_NP_Epilepsy_FRALYU_HMB_WES_20250721_ANV5_202507311844'), - bqsrc('63a1cdbc', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_GSA_MD_20250721_ANV5_202508051511'), - bqsrc('67ab4fc2', 'CCDG_Broad_NP_Epilepsy_GBRSWU_CARDI_NEURO_WES_20250721_ANV5_202507300410'), - bqsrc('59478363', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_GSA_MD_20250721_ANV5_202508051519'), - bqsrc('28c93500', 'CCDG_Broad_NP_Epilepsy_GBRUCL_DS_EARET_MDS_WES_20250721_ANV5_202507300422'), - bqsrc('508e8da9', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_GSA_MD_20250721_ANV5_202508051527'), - bqsrc('dcf91ba6', 'CCDG_Broad_NP_Epilepsy_GBRUNL_EP_ETIOLOGY_MDS_WES_20250721_ANV5_202507300429'), - bqsrc('3adff934', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_GSA_MD_20250721_ANV5_202508051535'), - bqsrc('c2dc4f2d', 'CCDG_Broad_NP_Epilepsy_GBRUNL_GRU_WES_20250721_ANV5_202507300438'), - bqsrc('a8eb2929', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_GSA_MD_20250721_ANV5_202508051543'), - bqsrc('0da705f6', 'CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_WES_20250721_ANV5_202507300445'), - bqsrc('7e5aa87b', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_GSA_MD_20250721_ANV5_202508051551'), - bqsrc('df3c4c25', 'CCDG_Broad_NP_Epilepsy_HKGHKK_HMB_MDS_WES_20250721_ANV5_202507311853'), - bqsrc('be423fad', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_GSA_MD_20250721_ANV5_202508051559'), - bqsrc('76b648a7', 'CCDG_Broad_NP_Epilepsy_HKOSB_GRU_WES_20250721_ANV5_202507300459'), - bqsrc('2e47cb4a', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_GSA_MD_20250721_ANV5_202508051608'), - bqsrc('b92ea668', 'CCDG_Broad_NP_Epilepsy_HRVUZG_HMB_MDS_WES_20250721_ANV5_202507300507'), - bqsrc('fb086869', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_GSA_MD_20250721_ANV5_202508051615'), - bqsrc('ef987506', 'CCDG_Broad_NP_Epilepsy_IRLRCI_GRU_IRB_WES_20250721_ANV5_202507300514'), - bqsrc('f695ec55', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051624'), - bqsrc('cbf89a9e', 'CCDG_Broad_NP_Epilepsy_ITAICB_HMB_NPU_MDS_WES_20250721_ANV5_202507300522'), - bqsrc('3383c514', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_GSA_MD_20250721_ANV5_202508051633'), - bqsrc('1e2905c4', 'CCDG_Broad_NP_Epilepsy_ITAIGI_GRU_WES_20250721_ANV5_202507311902'), - bqsrc('a49a2f24', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_GSA_MD_20250721_ANV5_202508051641'), - bqsrc('cf8affe8', 'CCDG_Broad_NP_Epilepsy_ITAUBG_DS_EPI_NPU_MDS_WES_20250721_ANV5_202507311912'), - bqsrc('2c90922a', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508051651'), - bqsrc('a8ea4518', 'CCDG_Broad_NP_Epilepsy_ITAUMC_DS_NEURO_MDS_WES_20250721_ANV5_202507300545'), - bqsrc('e9212e1f', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_GSA_MD_20250721_ANV5_202508051658'), - bqsrc('14cc9aca', 'CCDG_Broad_NP_Epilepsy_ITAUMR_GRU_NPU_WES_20250721_ANV5_202507300553'), - bqsrc('2c6d0fc7', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_GSA_MD_20250721_ANV5_202508051707'), - bqsrc('5ec41204', 'CCDG_Broad_NP_Epilepsy_JPNFKA_GRU_WES_20250721_ANV5_202507311923'), - bqsrc('42043ca7', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_GSA_MD_20250721_ANV5_202508051715'), - bqsrc('8696ea48', 'CCDG_Broad_NP_Epilepsy_JPNRKI_DS_NPD_IRB_NPU_WES_20250721_ANV5_202507300608'), - bqsrc('c1c7f997', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_GSA_MD_20250721_ANV5_202508051724'), - bqsrc('55db8a23', 'CCDG_Broad_NP_Epilepsy_KENKIL_GRU_WES_20250721_ANV5_202507311932'), - bqsrc('6292d318', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_GSA_MD_20250721_ANV5_202508051734'), - bqsrc('480ae148', 'CCDG_Broad_NP_Epilepsy_LEBABM_DS_Epilepsy_WES_20250721_ANV5_202507311941'), - bqsrc('3ef65a17', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_GSA_MD_20250721_ANV5_202508051741'), - bqsrc('9d825c25', 'CCDG_Broad_NP_Epilepsy_LEBABM_GRU_WES_20250721_ANV5_202507311950'), - bqsrc('c2551282', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508051749'), - bqsrc('b623f957', 'CCDG_Broad_NP_Epilepsy_LTUUHK_HMB_NPU_MDS_WES_20250721_ANV5_202507300637'), - bqsrc('d2fe229f', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_GSA_MD_20250721_ANV5_202508051757'), - bqsrc('eb2ef800', 'CCDG_Broad_NP_Epilepsy_NZLUTO_EPIL_BC_ID_MDS_WES_20250721_ANV5_202507300644'), - bqsrc('e78c32ac', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_GSA_MD_20250721_ANV5_202508051804'), - bqsrc('b6e37b1f', 'CCDG_Broad_NP_Epilepsy_TURBZU_GRU_WES_20250721_ANV5_202507300652'), - bqsrc('a5c4baea', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_GSA_MD_20250721_ANV5_202508051813'), - bqsrc('fc9369c5', 'CCDG_Broad_NP_Epilepsy_TURIBU_DS_NEURO_AD_NPU_WES_20250721_ANV5_202507300659'), - bqsrc('2e229142', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_GSA_MD_20250721_ANV5_202508051820'), - bqsrc('be41731b', 'CCDG_Broad_NP_Epilepsy_TWNCGM_HMB_NPU_AdultsONLY_WES_20250721_ANV5_202507300707'), - bqsrc('978ba5d4', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_GSA_MD_20250721_ANV5_202508051912'), - bqsrc('75f1ba4f', 'CCDG_Broad_NP_Epilepsy_USABCH_EPI_MUL_CON_MDS_WES_20250721_ANV5_202507300719'), - bqsrc('1102e29c', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_GSA_MD_20250721_ANV5_202508051920'), - bqsrc('906814a8', 'CCDG_Broad_NP_Epilepsy_USABLC_GRU_NPU_WES_20250721_ANV5_202507300726'), - bqsrc('d0448b56', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_GSA_MD_20250721_ANV5_202508051928'), - bqsrc('d76a4f76', 'CCDG_Broad_NP_Epilepsy_USACCF_HMB_MDS_WES_20250721_ANV5_202507311958'), - bqsrc('2950a024', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508051936'), - bqsrc('4cc169a9', 'CCDG_Broad_NP_Epilepsy_USACCH_DS_NEURO_MDS_WES_20250721_ANV5_202507300740'), - bqsrc('3a75c858', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_GSA_MD_20250721_ANV5_202508051945'), - bqsrc('6e53560b', 'CCDG_Broad_NP_Epilepsy_USACHP_GRU_WES_20250721_ANV5_202507312007'), - bqsrc('32238959', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_GSA_MD_20250721_ANV5_202508051955'), - bqsrc('a0516528', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_EP_MDS_WES_20250721_ANV5_202507300756'), - bqsrc('8a490e9e', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_GSA_MD_20250721_ANV5_202508052004'), - bqsrc('a894d673', 'CCDG_Broad_NP_Epilepsy_USACRW_DS_SEIZD_WES_20250721_ANV5_202507300804'), - bqsrc('8091a8f2', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_GSA_MD_20250721_ANV5_202508052013'), - bqsrc('0ba57db1', 'CCDG_Broad_NP_Epilepsy_USACRW_EPI_ASZ_MED_MDS_WES_20250721_ANV5_202507300810'), - bqsrc('e1a82cc1', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_GSA_MD_20250721_ANV5_202508052020'), - bqsrc('98461d37', 'CCDG_Broad_NP_Epilepsy_USAEGP_GRU_WES_20250721_ANV5_202507300817'), - bqsrc('1490968b', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_GSA_MD_20250721_ANV5_202508052028'), - bqsrc('96cd36de', 'CCDG_Broad_NP_Epilepsy_USAFEB_GRU_WES_20250721_ANV5_202507300824'), - bqsrc('189ec05e', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_GSA_MD_20250721_ANV5_202508052035'), - bqsrc('32793aae', 'CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_20250721_ANV5_202507300831'), - bqsrc('f344bff6', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_MDS_GSA_MD_20250721_ANV5_202508052042'), - bqsrc('21a15106', 'CCDG_Broad_NP_Epilepsy_USALCH_HMB_WES_20250721_ANV5_202507300838'), - bqsrc('1d2b5391', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_GSA_MD_20250721_ANV5_202508052050'), - bqsrc('d3c243dc', 'CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_WES_20250721_ANV5_202507300845'), - bqsrc('65dc7d73', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_GSA_MD_20250721_ANV5_202508052101'), - bqsrc('38abec69', 'CCDG_Broad_NP_Epilepsy_USAMGH_MGBB_HMB_MDS_WES_20250721_ANV5_202507300852'), - bqsrc('aa634284', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_GSA_MD_20250730_ANV5_202508061225'), - bqsrc('f715714d', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_GSA_MD_20250721_ANV5_202508052109'), - bqsrc('0fef9408', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_NPU_WES_20250721_ANV5_202507300900'), - bqsrc('d2819933', 'CCDG_Broad_NP_Epilepsy_USAMON_GRU_WES_20250721_ANV5_202507300906'), - bqsrc('749bc2ed', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_NPU_MDS_GSA_MD_20250721_ANV5_202508052116'), - bqsrc('8951be0e', 'CCDG_Broad_NP_Epilepsy_USAMON_HMB_WES_20250721_ANV5_202507300913'), - bqsrc('2d5171ac', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_GSA_MD_20250721_ANV5_202508052124'), - bqsrc('e358235d', 'CCDG_Broad_NP_Epilepsy_USAMSS_DS_EP_NEURO_MDS_WES_20250721_ANV5_202507300921'), - bqsrc('9277558d', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_GSA_MD_20250721_ANV5_202508052131'), - bqsrc('e4cb6e34', 'CCDG_Broad_NP_Epilepsy_USANCH_DS_NEURO_MDS_WES_20250721_ANV5_202507300929'), - bqsrc('b1bb610a', 'CCDG_Broad_NP_Epilepsy_USAUPN_CHOP_GRU_GSA_MD_20250721_ANV5_202508052139'), - bqsrc('9e68cf90', 'CCDG_Broad_NP_Epilepsy_USAUPN_CHOP_GRU_NPU_GSA_MD_20250721_ANV5_202508052147'), - bqsrc('cb408fa3', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_NPU_WES_20250721_ANV5_202507300937'), - bqsrc('809be4ae', 'CCDG_Broad_NP_Epilepsy_USAUPN_Marsh_GRU_WES_20250721_ANV5_202507300944'), - bqsrc('3cce2504', 'CCDG_Broad_NP_Epilepsy_USAUPN_Penn_GRU_GSA_MD_20250721_ANV5_202508052154'), - bqsrc('3ca6cd04', 'CCDG_Broad_NP_Epilepsy_USAUPN_Rader_GRU_WES_20250721_ANV5_202507300951'), - bqsrc('b2659034', 'CCDG_Broad_NP_Epilepsy_USAVANControls_HMB_GSO_GSA_MD_20250721_ANV5_202508052211'), - bqsrc('cf9e28f1', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_GSA_MD_20250721_ANV5_202508052202'), - bqsrc('11a5f960', 'CCDG_Broad_NP_Epilepsy_USAVAN_HMB_GSO_WES_20250721_ANV5_202507300959'), - # Supersedes snapshot ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_… popped below - bqsrc('ce5b4d0e', 'CCDG_Broad_NP_Epilepsy_USAVANcontrols_HMB_GSO_WES_20250721_ANV5_202507301007'), - bqsrc('84b1d212', 'CCDG_Broad_NP_Epilepsy_ZAFAGN_DS_EPI_COMO_MDS_GSA_MD_20250721_ANV5_202508052220'), - # Supersedes snapshot ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes… popped below - bqsrc('4b531498', 'CCDG_Broad_NP_Epilepsy_ZAFAGN_DS_EPI_COMO_MDS_WES_20250721_ANV5_202507301017'), - bqsrc('bdc5f5a9', 'CCDG_Broad_Spalletta_HMB_NPU_MDS_WES_20250721_ANV5_202507301024'), - bqsrc('2b98851b', 'CMG_Yale_GRU_20221020_ANV5_202507091800'), - bqsrc('83ab11a0', 'CMH_GAFK_R5_20250801_ANV5_202508011248'), - bqsrc('e5c7dfdd', 'ENCORE_293T_20250710_ANV5_202507211509'), - bqsrc('43e71067', 'ENCORE_RS293_20250710_ANV5_202507211507'), - bqsrc('262cd5df', 'GREGOR_R03_GRU_20250612_ANV5_202506271443'), - bqsrc('a50f51b8', 'GREGOR_R03_HMB_20250612_ANV5_202506271503'), - bqsrc('fe5fb412', 'GTEx_v10_hg38_20241105_ANV5_202506201300'), - bqsrc('e6508a35', 'HudsonAlpha_LR_v1_GRU_20241018_ANV5_202507091815'), - bqsrc('4ed55ce5', 'IGVF_HMB_MDS_R1_20250801_ANV5_202508011316'), - bqsrc('34477ca5', 'MAGE_20250710_ANV5_202507211510'), - bqsrc('027605b3', 'NIA_CARD_LR_WGS_HBCC_20250731_ANV5_202508011421'), - bqsrc('31f7c3dd', 'NIA_CARD_LR_WGS_NABEC_GRU_V2_20250731_ANV5_202508011423'), - bqsrc('04b6f4d8', 'PAGE_BioMe_GRU_WGS_20250224_ANV5_202502241731'), - bqsrc('af4c978f', 'PAGE_MEC_GRU_WGS_20250224_ANV5_202502241739'), - bqsrc('71b74bcf', 'PAGE_Stanford_Global_Reference_Panel_GRU_WGS_20250224_ANV5_202502241745'), - bqsrc('ff012258', 'ccdg_broad_np_epilepsy_usavancontrols_hmb_gso_wes_20221101_ANV5_202409302105', pop), - bqsrc('61b6b42b', 'ccdg_broad_np_epilepsy_zafagn_ds_epi_como_mds_wes_20221026_ANV5_202409302116', pop), -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `hammerbox` deployment here. The hammerbox is - # used to run integration tests against PRs and to perform CI/CD - # experiments. - # - # You can use this file as a template for a personal deployment. Look - # for conditionals using the `is_sandbox` variable and adjust the `else` - # branch accordingly. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - # When using this file as a template for a personal deployment, replace - # `None` with a short string that is specific to YOU. - # - 'AZUL_DEPLOYMENT_STAGE': 'hammerbox' if is_sandbox else None, - - # This deployment uses a subdomain of the `anvilprod` deployment's - # domain. - # - 'AZUL_DOMAIN_NAME': 'explore.anvilproject.org', - 'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}', - - 'AZUL_CATALOGS': base64.b64encode(bz2.compress(json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='anvil'), - repository=dict(name='tdr_anvil')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('anvil', 'anvil9', anvil9_sources), - ('anvil', 'anvil11', anvil11_sources), - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }).encode())).decode('ascii'), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://data.terra.bio', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org', - 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-prod.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org', - - # Personal deployments & `hammerbox` share an ES domain with `anvilprod` - 'AZUL_SHARE_ES_DOMAIN': '1', - 'AZUL_ES_DOMAIN': 'azul-index-anvilprod', - - **( - { - } if is_sandbox else { - # Personal deployments use fewer Lambda invocations in parallel. - 'AZUL_CONTRIBUTION_CONCURRENCY': '8', - 'AZUL_AGGREGATION_CONCURRENCY': '8', - } - ), - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'anvil', - - # When using this file as a template for a personal deployment, change - # `None` to a string containing YOUR email address. - # - 'AZUL_OWNER': 'hannes@ucsc.edu' if is_sandbox else None, - - 'AZUL_MONITORING_EMAIL': '{AZUL_OWNER}', - - 'AZUL_AWS_ACCOUNT_ID': '465330168186', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-anvil-prod', - - 'AZUL_DEPLOYMENT_INCARNATION': '0', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '1055427471534-ienqfvg4fl2n7agcumn63b12npsjorlr.apps.googleusercontent.com', - - 'AZUL_ENABLE_VERBATIM_RELATIONS': '0', - } diff --git a/deployments/prod.browser/environment.py b/deployments/prod.browser/environment.py deleted file mode 100644 index 753a0e6d03..0000000000 --- a/deployments/prod.browser/environment.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'browser', - 'azul_browser_sites': json.dumps({ - 'browser': { - 'zone': 'explore.data.humancellatlas.org', - 'domain': 'explore.data.humancellatlas.org', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/hca/prod', - 'tarball_name': 'hca', - 'tarball_path': 'out', - 'real_path': '' - }, - 'lungmap': { - 'zone': 'data-browser.lungmap.net', - 'domain': 'data-browser.lungmap.net', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/lungmap/prod', - 'tarball_name': 'lungmap', - 'tarball_path': 'out', - 'real_path': '' - } - }) - } diff --git a/deployments/prod.gitlab/environment.py b/deployments/prod.gitlab/environment.py deleted file mode 100644 index 32fc2f33d5..0000000000 --- a/deployments/prod.gitlab/environment.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'gitlab', - 'azul_vpc_cidr': '172.22.0.0/16', - 'azul_vpn_subnet': '10.43.0.0/16' - } diff --git a/deployments/prod.shared/environment.py b/deployments/prod.shared/environment.py deleted file mode 120000 index 1a13b702a8..0000000000 --- a/deployments/prod.shared/environment.py +++ /dev/null @@ -1 +0,0 @@ -../dev.shared/environment.py \ No newline at end of file diff --git a/deployments/prod/.example.environment.local.py b/deployments/prod/.example.environment.local.py deleted file mode 120000 index 45b1b10142..0000000000 --- a/deployments/prod/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../dev/.example.environment.local.py \ No newline at end of file diff --git a/deployments/prod/environment.py b/deployments/prod/environment.py deleted file mode 100644 index 949274ef2a..0000000000 --- a/deployments/prod/environment.py +++ /dev/null @@ -1,1878 +0,0 @@ -import base64 -import bz2 -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -pop = 1 # remove snapshot - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - _, env, project, _ = snapshot.split('_', 3) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -dcp12_sources = mkdict({}, 195, mkdelta([ - mksrc('bigquery', 'datarepo-a1c89fba', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-a9316414', 'hca_prod_027c51c60719469fa7f5640fe57cbece__20220110_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-d111fe96', 'hca_prod_03c6fce7789e4e78a27a664d562bb738__20220110_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-a2d29140', 'hca_prod_04ad400c58cb40a5bc2b2279e13a910b__20220114_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-d8ad6862', 'hca_prod_05657a599f9d4bb9b77b24be13aa5cea__20220110_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-c9b9a2e8', 'hca_prod_05be4f374506429bb112506444507d62__20220107_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-4e087937', 'hca_prod_07073c1280064710a00b23abdb814904__20220107_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-9226064c', 'hca_prod_074a9f88729a455dbca50ce80edf0cea__20220107_dcp2_20220113_dcp12'), - mksrc('bigquery', 'datarepo-5bd98333', 'hca_prod_0792db3480474e62802c9177c9cd8e28__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-580db83c', 'hca_prod_08b794a0519c4516b184c583746254c5__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-0b49ea1e', 'hca_prod_091cf39b01bc42e59437f419a66c8a45__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-109db6e4', 'hca_prod_0c09fadee0794fde8e606725b8c1d84b__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-26de5247', 'hca_prod_0c3b7785f74d40918616a68757e4c2a8__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-ae49a863', 'hca_prod_0d4b87ea6e9e456982e41343e0e3259f__20220110_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-76169feb', 'hca_prod_0fd8f91862d64b8bac354c53dd601f71__20220110_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-4b42c4ef', 'hca_prod_116965f3f09447699d28ae675c1b569c__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-eb39c36f', 'hca_prod_16ed4ad8731946b288596fe1c1d73a82__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-982c56ad', 'hca_prod_1c6a960d52ac44eab728a59c7ab9dc8e__20220110_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-f24e8394', 'hca_prod_1cd1f41ff81a486ba05b66ec60f81dcf__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-b8ffd379', 'hca_prod_1ce3b3dc02f244a896dad6d107b27a76__20220107_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-b1ac3907', 'hca_prod_1defdadaa36544ad9b29443b06bd11d6__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-4e5e9f9b', 'hca_prod_2043c65a1cf84828a6569e247d4e64f1__20220111_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-156c78f4', 'hca_prod_2084526ba66f4c40bb896fd162f2eb38__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-228ac7b7', 'hca_prod_2086eb0510b9432bb7f0169ccc49d270__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-7defc353', 'hca_prod_20f37aafcaa140e69123be6ce8feb2d6__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-783bc6c3', 'hca_prod_21ea8ddb525f4f1fa82031f0360399a2__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-d8b00524', 'hca_prod_23587fb31a4a4f58ad74cc9a4cb4c254__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-8390f5e3', 'hca_prod_248fcf0316c64a41b6ccaad4d894ca42__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-45f08380', 'hca_prod_24c654a5caa5440a8f02582921f2db4a__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-ab44f4d8', 'hca_prod_2a64db431b554639aabb8dba0145689d__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-bfdde7e3', 'hca_prod_2a72a4e566b2405abb7c1e463e8febb0__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-f4d7c97e', 'hca_prod_2ad191cdbd7a409b9bd1e72b5e4cce81__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-e4d77c97', 'hca_prod_2af52a1365cb4973b51339be38f2df3f__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-aebdd74a', 'hca_prod_2b38025da5ea4c0fb22e367824bcaf4c__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-e67b97d4', 'hca_prod_2d8460958a334f3c97d4585bafac13b4__20220111_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-b123707e', 'hca_prod_2ef3655a973d4d699b4121fa4041eed7__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-3b845979', 'hca_prod_2f67614380c24bc6b7b42613fe0fadf0__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-40cecf86', 'hca_prod_3089d311f9ed44ddbb10397059bad4dc__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-e6d0e6ab', 'hca_prod_31887183a72c43089eacc6140313f39c__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-059455a6', 'hca_prod_34cba5e9ecb14d81bf0848987cd63073__20220111_dcp2_20220114_dcp12'), - mksrc('bigquery', 'datarepo-18838720', 'hca_prod_376a7f55b8764f609cf3ed7bc83d5415__20220111_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-002f293a', 'hca_prod_379ed69ebe0548bcaf5ea7fc589709bf__20220111_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-94ea8d84', 'hca_prod_38449aea70b540db84b31e08f32efe34__20220111_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-597059bb', 'hca_prod_38e44dd0c3df418e9256d0824748901f__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-9b80ca5d', 'hca_prod_3a69470330844ece9abed935fd5f6748__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-caef7414', 'hca_prod_3c27d2ddb1804b2bbf05e2e418393fd1__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-d091ac22', 'hca_prod_3cfcdff5dee14a7ba591c09c6e850b11__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-ab983bdd', 'hca_prod_3e329187a9c448ec90e3cc45f7c2311c__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-5e5bce33', 'hca_prod_4037007b0eff4e6db7bd8dd8eec80143__20220112_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-c6ce3ced', 'hca_prod_403c3e7668144a2da5805dd5de38c7ff__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-d2fa6418', 'hca_prod_414accedeba0440fb721befbc5642bef__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-3ae19ddb', 'hca_prod_41fb1734a121461695c73b732c9433c7__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-50081b3c', 'hca_prod_42d4f8d454224b78adaee7c3c2ef511c__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-a7e55305', 'hca_prod_455b46e6d8ea4611861ede720a562ada__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-99250e4a', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-d1983cfc', 'hca_prod_4af795f73e1d4341b8674ac0982b9efd__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-21212245', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-001a2f34', 'hca_prod_4d6f6c962a8343d88fe10f53bffd4674__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-26396466', 'hca_prod_4e6f083b5b9a439398902a83da8188f1__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-3ef66093', 'hca_prod_50151324f3ed435898afec352a940a61__20220113_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-bd1c5759', 'hca_prod_504e0cee168840fab936361c4a831f87__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-6ab76705', 'hca_prod_5116c0818be749c58ce073b887328aa9__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-458232e4', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-3e19670d', 'hca_prod_520afa10f9d24e93ab7a26c4c863ce18__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-3eec204e', 'hca_prod_52b29aa4c8d642b4807ab35be94469ca__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-e7c01a93', 'hca_prod_52d10a60c8d14d068a5eaf0d5c0d5034__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-3b0847fe', 'hca_prod_53c53cd481274e12bc7f8fe1610a715c__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-d4f43fb3', 'hca_prod_54aaa409dc2848c5be26d368b4a5d5c6__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-6ed675f9', 'hca_prod_559bb888782941f2ace52c05c7eb81e9__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-5bdba230', 'hca_prod_56e73ccb7ae94faea738acfb69936d7a__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-6b1109e5', 'hca_prod_577c946d6de54b55a854cd3fde40bff2__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-d6e79c46', 'hca_prod_58028aa80ed249cab60f15e2ed5989d5__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-8494da48', 'hca_prod_591af954cdcd483996d3a0d1b1e885ac__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-17088287', 'hca_prod_5b3285614a9740acb7ad6a90fc59d374__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-4977894a', 'hca_prod_5b5f05b72482468db76d8f68c04a7a47__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-99725d7d', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-83783d1c', 'hca_prod_5eafb94b02d8423e81b83673da319ca0__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-f25df8f2', 'hca_prod_5ee710d7e2d54fe2818d15f5e31dae32__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-99348797', 'hca_prod_602628d7c03848a8aa97ffbb2cb44c9d__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-e8e29a46', 'hca_prod_6072616c87944b208f52fb15992ea5a4__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-bd224cce', 'hca_prod_60ea42e1af4942f58164d641fdb696bc__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-a4f706c9', 'hca_prod_63b5b6c1bbcd487d8c2e0095150c1ecd__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-0e494119', 'hca_prod_65858543530d48a6a670f972b34dfe10__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-c8ed0e98', 'hca_prod_67a3de0945b949c3a068ff4665daa50e__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-b1223d0f', 'hca_prod_68df3629d2d24eedb0aba10e0f019b88__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-b7734519', 'hca_prod_6c040a938cf84fd598de2297eb07e9f6__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-489f5a00', 'hca_prod_7027adc6c9c946f384ee9badc3a4f53b__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-465f2c7c', 'hca_prod_71436067ac414acebe1b2fbcc2cb02fa__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-574f6410', 'hca_prod_71eb5f6dcee04297b503b1125909b8c7__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-61e3e2d1', 'hca_prod_74493e9844fc48b0a58fcc7e77268b59__20220117_dcp2_20220120_dcp12'), - mksrc('bigquery', 'datarepo-699bbe9b', 'hca_prod_74b6d5693b1142efb6b1a0454522b4a0__20220117_dcp2_20220124_dcp12'), - mksrc('bigquery', 'datarepo-674de9c8', 'hca_prod_75dbbce90cde489c88a793e8f92914a3__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d043e30f', 'hca_prod_769a08d1b8a44f1e95f76071a9827555__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-5c757273', 'hca_prod_783c9952a4ae4106a6ce56f20ce27f88__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-bc095feb', 'hca_prod_7880637a35a14047b422b5eac2a2a358__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-333e09de', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-9268e5a3', 'hca_prod_79b13a2a9ca142a497bd70208a11bea6__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-93812eed', 'hca_prod_7ac8822c4ef04194adf074290611b1c6__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-db3813a8', 'hca_prod_7adede6a0ab745e69b67ffe7466bec1f__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-33a60e82', 'hca_prod_7b947aa243a74082afff222a3e3a4635__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-ccf60635', 'hca_prod_7c75f07c608d4c4aa1b7b13d11c0ad31__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-30e31b57', 'hca_prod_8185730f411340d39cc3929271784c2b__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-9d5ab6f0', 'hca_prod_83f5188e3bf749569544cea4f8997756__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-57af0017', 'hca_prod_842605c7375a47c59e2ca71c2c00fcad__20220117_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-c3aea89c', 'hca_prod_8559a8ed5d8c4fb6bde8ab639cebf03c__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-a054435f', 'hca_prod_8787c23889ef4636a57d3167e8b54a80__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4d40e3cb', 'hca_prod_87d52a86bdc7440cb84d170f7dc346d9__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-486fab06', 'hca_prod_88ec040b87054f778f41f81e57632f7d__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-32fc3ac7', 'hca_prod_8999b4566fa6438bab17b62b1d8ec0c3__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-19e9b807', 'hca_prod_8a40ff19e6144c50b23b5c9e1d546bab__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-a71cbef5', 'hca_prod_8ab8726d81b94bd2acc24d50bee786b4__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-6cf8837e', 'hca_prod_8bd2e5f694534b9b9c5659e3a40dc87e__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8383e25b', 'hca_prod_8c3c290ddfff4553886854ce45f4ba7f__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d425ceae', 'hca_prod_8d566d35d8d34975a351be5e25e9b2ea__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-c15b7397', 'hca_prod_8dacb243e9184bd2bb9aaac6dc424161__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8ed2742a', 'hca_prod_90bd693340c048d48d76778c103bf545__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-05d8344b', 'hca_prod_94023a08611d4f22a8c990956e091b2e__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-87faf2bd', 'hca_prod_946c5add47d1402a97bba5af97e8bce7__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8238f8f6', 'hca_prod_955dfc2ca8c64d04aa4d907610545d11__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-0f11337c', 'hca_prod_95f07e6e6a734e1ba880c83996b3aa5c__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-f680e590', 'hca_prod_962bd805eb894c54bad2008e497d1307__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-9b7aa7dd', 'hca_prod_99101928d9b14aafb759e97958ac7403__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8a2c2dfd', 'hca_prod_996120f9e84f409fa01e732ab58ca8b9__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-9385cdd8', 'hca_prod_9d97f01f9313416e9b07560f048b2350__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-ddcd2940', 'hca_prod_a004b1501c364af69bbd070c06dbc17d__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-16e78655', 'hca_prod_a29952d9925e40f48a1c274f118f1f51__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-9aa62158', 'hca_prod_a39728aa70a04201b0a281b7badf3e71__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-7180120b', 'hca_prod_a60803bbf7db45cfb52995436152a801__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-b4669bfd', 'hca_prod_a80a63f2e223489081b0415855b89abc__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-e899aaaa', 'hca_prod_a9301bebe9fa42feb75c84e8a460c733__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-373a5866', 'hca_prod_a96b71c078a742d188ce83c78925cfeb__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-92bd008d', 'hca_prod_a991ef154d4a4b80a93ec538b4b54127__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-6652ddcb', 'hca_prod_a9c022b4c7714468b769cabcf9738de3__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4975e16f', 'hca_prod_abe1a013af7a45ed8c26f3793c24a1f4__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-be8901be', 'hca_prod_ad04c8e79b7d4cceb8e901e31da10b94__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4a3b719a', 'hca_prod_ad98d3cd26fb4ee399c98a2ab085e737__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d7f6d5fa', 'hca_prod_ae71be1dddd84feb9bed24c3ddb6e1ad__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-58678d36', 'hca_prod_b32a9915c81b4cbcaf533a66b5da3c9a__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-ff628beb', 'hca_prod_b4a7d12f6c2f40a39e359756997857e3__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-2a48ce64', 'hca_prod_b51f49b40d2e4cbdbbd504cd171fc2fa__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-ffc43998', 'hca_prod_b7259878436c4274bfffca76f4cb7892__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-41c56298', 'hca_prod_b9484e4edc404e389b854cecf5b8c068__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-0cbe9f7b', 'hca_prod_b963bd4b4bc14404842569d74bc636b8__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d4fa6f7e', 'hca_prod_bd40033154b94fccbff66bb8b079ee1f__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-5cfa0843', 'hca_prod_bd7104c9a950490e94727d41c6b11c62__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-53d134a5', 'hca_prod_c1810dbc16d245c3b45e3e675f88d87b__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8d82eeff', 'hca_prod_c1a9a93dd9de4e659619a9cec1052eaa__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-7357dda4', 'hca_prod_c31fa434c9ed4263a9b6d9ffb9d44005__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4322539b', 'hca_prod_c4077b3c5c984d26a614246d12c2e5d7__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-c746ef64', 'hca_prod_c41dffbfad83447ca0e113e689d9b258__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-072807b7', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-62e87fe3', 'hca_prod_c5f4661568de4cf4bbc2a0ae10f08243__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-c59a45c5', 'hca_prod_c6ad8f9bd26a4811b2ba93d487978446__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d0540cc6', 'hca_prod_c715cd2fdc7c44a69cd5b6a6d9f075ae__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-2b95c100', 'hca_prod_c893cb575c9f4f26931221b85be84313__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-dd3d8e06', 'hca_prod_cc95ff892e684a08a234480eca21ce79__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-2a5a6085', 'hca_prod_ccd1f1ba74ce469b9fc9f6faea623358__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-2b0f8836', 'hca_prod_ccef38d7aa9240109621c4c7b1182647__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8d6a8dd5', 'hca_prod_cddab57b68684be4806f395ed9dd635a__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-202827af', 'hca_prod_ce33dde2382d448cb6acbfb424644f23__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-cde2c4a4', 'hca_prod_ce7b12ba664f4f798fc73de6b1892183__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4f711011', 'hca_prod_d012d4768f8c4ff389d6ebbe22c1b5c1__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-a718a79a', 'hca_prod_d2111fac3fc44f429b6d32cd6a828267__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-12801bd1', 'hca_prod_d3446f0c30f34a12b7c36af877c7bb2d__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-dac6d601', 'hca_prod_d3a4ceac4d66498497042570c0647a56__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-1e99243c', 'hca_prod_d3ac7c1b53024804b611dad9f89c049d__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-e73ca25f', 'hca_prod_d7845650f6b14b1cb2fec0795416ba7b__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-7796a030', 'hca_prod_d7b7beae652b4fc09bf2bcda7c7115af__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-438137ee', 'hca_prod_da2747fa292142e0afd439ef57b2b88b__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-7f7fb2ac', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-404d888e', 'hca_prod_dbcd4b1d31bd4eb594e150e8706fa192__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-ee4df1a4', 'hca_prod_dc1a41f69e0942a6959e3be23db6da56__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-89f05580', 'hca_prod_dd7f24360c564709bd17e526bba4cc15__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-d6e13093', 'hca_prod_df88f39f01a84b5b92f43177d6c0f242__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-319b223a', 'hca_prod_e0009214c0a04a7b96e2d6a83e966ce0__20220119_dcp2_20220126_dcp12'), - mksrc('bigquery', 'datarepo-cd37664c', 'hca_prod_e0c74c7a20a445059cf138dcdd23011b__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-921c7df9', 'hca_prod_e526d91dcf3a44cb80c5fd7676b55a1d__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8d441277', 'hca_prod_e57dc176ab98446b90c289e0842152fd__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-a2bba34f', 'hca_prod_e5d455791f5b48c3b568320d93e7ca72__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-32d08de8', 'hca_prod_e77fed30959d4fadbc15a0a5a85c21d2__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-de2f2f56', 'hca_prod_e8808cc84ca0409680f2bba73600cba6__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-b49ee748', 'hca_prod_eaefa1b6dae14414953b17b0427d061e__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-192f44d3', 'hca_prod_ede2e0b46652464fabbc0b2d964a25a0__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-79a515aa', 'hca_prod_ef1d9888fa8647a4bb720ab0f20f7004__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-8ddfa027', 'hca_prod_ef1e3497515e4bbe8d4c10161854b699__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-902930ad', 'hca_prod_efea6426510a4b609a19277e52bfa815__20220118_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-708835eb', 'hca_prod_f0f89c1474604bab9d4222228a91f185__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-cf6bd64d', 'hca_prod_f2fe82f044544d84b416a885f3121e59__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-44df5b5a', 'hca_prod_f48e7c39cc6740559d79bc437892840c__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-eb93ad96', 'hca_prod_f6133d2a9f3d4ef99c19c23d6c7e6cc0__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-e3c29b0f', 'hca_prod_f81efc039f564354aabb6ce819c3d414__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-11942c76', 'hca_prod_f83165c5e2ea4d15a5cf33f3550bffde__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-c64a357d', 'hca_prod_f86f1ab41fbb4510ae353ffd752d4dfc__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-4167b729', 'hca_prod_f8aa201c4ff145a4890e840d63459ca2__20220119_dcp2_20220121_dcp12'), - mksrc('bigquery', 'datarepo-590e9f21', 'hca_prod_faeedcb0e0464be7b1ad80a3eeabb066__20220119_dcp2_20220121_dcp12'), -])) - -dcp13_sources = mkdict(dcp12_sources, 208, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-c8f9ec5d', 'hca_prod_03c6fce7789e4e78a27a664d562bb738__20220110_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-991fac12', 'hca_prod_05657a599f9d4bb9b77b24be13aa5cea__20220110_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-b185fd85', 'hca_prod_05be4f374506429bb112506444507d62__20220107_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-30dc00be', 'hca_prod_065e6c13ad6b46a38075c3137eb03068__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-0285bfe0', 'hca_prod_06c7dd8d6cc64b79b7958805c47d36e1__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-bde655c7', 'hca_prod_102018327c7340339b653ef13d81656a__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-003ece01', 'hca_prod_1ce3b3dc02f244a896dad6d107b27a76__20220107_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-5a090360', 'hca_prod_1dddae6e375348afb20efa22abad125d__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-a0659f9b', 'hca_prod_1eb69a39b5b241ecafae5fe37f272756__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-cbdabbb4', 'hca_prod_23587fb31a4a4f58ad74cc9a4cb4c254__20220111_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-2ad5c040', 'hca_prod_2a72a4e566b2405abb7c1e463e8febb0__20220111_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-ca52c87a', 'hca_prod_2d8460958a334f3c97d4585bafac13b4__20220111_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-3da21f85', 'hca_prod_3a69470330844ece9abed935fd5f6748__20220112_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-f84c69b4', 'hca_prod_520afa10f9d24e93ab7a26c4c863ce18__20220117_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-fd9c289b', 'hca_prod_58028aa80ed249cab60f15e2ed5989d5__20220117_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-afff5936', 'hca_prod_67a3de0945b949c3a068ff4665daa50e__20220117_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-7e70b0df', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-cafbc244', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-0558746b', 'hca_prod_78d7805bfdc8472b8058d92cf886f7a4__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-fb7a9fe5', 'hca_prod_8559a8ed5d8c4fb6bde8ab639cebf03c__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-5ee4d674', 'hca_prod_85a9263b088748edab1addfa773727b6__20220224_dcp2_20220224_dcp13'), - mksrc('bigquery', 'datarepo-604c0800', 'hca_prod_88ec040b87054f778f41f81e57632f7d__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-651b3c64', 'hca_prod_8c3c290ddfff4553886854ce45f4ba7f__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-9029753d', 'hca_prod_99101928d9b14aafb759e97958ac7403__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-0a0a2225', 'hca_prod_9c20a245f2c043ae82c92232ec6b594f__20220212_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-9385cdd8', 'hca_prod_9d97f01f9313416e9b07560f048b2350__20220118_dcp2_20220121_dcp12', pop), # noqa E501 - mksrc('bigquery', 'datarepo-3dda61fd', 'hca_prod_ccd1f1ba74ce469b9fc9f6faea623358__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-021d07c6', 'hca_prod_ccef38d7aa9240109621c4c7b1182647__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-8c5ae0d1', 'hca_prod_cd61771b661a4e19b2696e5d95350de6__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-e69f2dd7', 'hca_prod_d6225aee8f0e4b20a20c682509a9ea14__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-b11dcc58', 'hca_prod_d71c76d336704774a9cf034249d37c60__20220213_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-e251e383', 'hca_prod_dbd836cfbfc241f0983441cc6c0b235a__20220212_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-ce17ac99', 'hca_prod_dd7ada843f144765b7ce9b64642bb3dc__20220212_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-8e3d7fce', 'hca_prod_e8808cc84ca0409680f2bba73600cba6__20220118_dcp2_20220214_dcp13'), - mksrc('bigquery', 'datarepo-43f772c9', 'hca_prod_f6133d2a9f3d4ef99c19c23d6c7e6cc0__20220119_dcp2_20220214_dcp13'), - # @formatter:on -])) - -dcp14_sources = mkdict(dcp13_sources, 218, mkdelta([ - mksrc('bigquery', 'datarepo-ef305f42', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-4fb4619a', 'hca_prod_074a9f88729a455dbca50ce80edf0cea__20220107_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-1dbff5cd', 'hca_prod_091cf39b01bc42e59437f419a66c8a45__20220107_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-73b30762', 'hca_prod_116965f3f09447699d28ae675c1b569c__20220107_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-ecd9f488', 'hca_prod_165dea71a95a44e188cdb2d9ad68bb1e__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-c3ca85db', 'hca_prod_24d0dbbc54eb49048141934d26f1c936__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-6eecb96e', 'hca_prod_2c041c26f75a495fab36a076f89d422a__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-99fdfa87', 'hca_prod_3cdaf942f8ad42e8a77b4efedb9ea7b6__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-cf90c331', 'hca_prod_403c3e7668144a2da5805dd5de38c7ff__20220113_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-b9918259', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-77f534b9', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-b230b42b', 'hca_prod_4d6f6c962a8343d88fe10f53bffd4674__20220113_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-b83d5d98', 'hca_prod_4e6f083b5b9a439398902a83da8188f1__20220113_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-d7e92ae1', 'hca_prod_5116c0818be749c58ce073b887328aa9__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-9e63ca34', 'hca_prod_53c53cd481274e12bc7f8fe1610a715c__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-6b360d3f', 'hca_prod_5b5f05b72482468db76d8f68c04a7a47__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-47534f24', 'hca_prod_6ac8e777f9a04288b5b0446e8eba3078__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-aa6a9210', 'hca_prod_74b6d5693b1142efb6b1a0454522b4a0__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-7274c749', 'hca_prod_7b947aa243a74082afff222a3e3a4635__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-06d0218d', 'hca_prod_8185730f411340d39cc3929271784c2b__20220117_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-958f743f', 'hca_prod_91af6e2f65f244ec98e0ba4e98db22c8__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-8ef24363', 'hca_prod_95f07e6e6a734e1ba880c83996b3aa5c__20220118_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-bc66239d', 'hca_prod_abe1a013af7a45ed8c26f3793c24a1f4__20220118_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-ccddf7b7', 'hca_prod_b963bd4b4bc14404842569d74bc636b8__20220118_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-145862d0', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-1d4ac83f', 'hca_prod_c211fd49d9804ba18c6ac24254a3cb52__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-a7ff96eb', 'hca_prod_c4077b3c5c984d26a614246d12c2e5d7__20220118_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-ff4ee826', 'hca_prod_c6a50b2a3dfd4ca89b483e682f568a25__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-15efafd9', 'hca_prod_cc95ff892e684a08a234480eca21ce79__20220118_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-264555df', 'hca_prod_e5d455791f5b48c3b568320d93e7ca72__20220119_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-9cbb67c6', 'hca_prod_f29b124a85974862ae98ff3a0fd9033e__20220303_dcp2_20220307_dcp14'), - mksrc('bigquery', 'datarepo-09a8dd1a', 'hca_prod_f83165c5e2ea4d15a5cf33f3550bffde__20220119_dcp2_20220307_dcp14'), -])) - -dcp15_sources = mkdict(dcp14_sources, 237, mkdelta([ - mksrc('bigquery', 'datarepo-bb0322f9', 'hca_prod_04ad400c58cb40a5bc2b2279e13a910b__20220114_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-4c006992', 'hca_prod_0562d2ae0b8a459ebbc06357108e5da9__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-625580ba', 'hca_prod_0777b9ef91f3468b9deadb477437aa1a__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-c6460226', 'hca_prod_18d4aae283634e008eebb9e568402cf8__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-9e1d30cd', 'hca_prod_1ce3b3dc02f244a896dad6d107b27a76__20220107_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-426125f5', 'hca_prod_2b38025da5ea4c0fb22e367824bcaf4c__20220111_dcp2_20220331_dcp15'), - mksrc('bigquery', 'datarepo-67ebf8c0', 'hca_prod_40272c3b46974bd4ba3f82fa96b9bf71__20220303_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-7e581d49', 'hca_prod_40ca2a03ec0f471fa834948199495fe7__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-4b461192', 'hca_prod_45c2c853d06f4879957ef1366fb5d423__20220303_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-b5a6fdd9', 'hca_prod_5116c0818be749c58ce073b887328aa9__20220117_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-abf80711', 'hca_prod_65d7a1684d624bc083244e742aa62de6__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-4a1d1031', 'hca_prod_6621c827b57a4268bc80df4049140193__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-ecd5ed43', 'hca_prod_6ac8e777f9a04288b5b0446e8eba3078__20220303_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-993d35db', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-fb756d63', 'hca_prod_73769e0a5fcd41f4908341ae08bfa4c1__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-b174a30e', 'hca_prod_77780d5603c0481faade2038490cef9f__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-54af5ab6', 'hca_prod_91af6e2f65f244ec98e0ba4e98db22c8__20220303_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-89b77174', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-c95907eb', 'hca_prod_99101928d9b14aafb759e97958ac7403__20220118_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-a186fcb1', 'hca_prod_a2a2f324cf24409ea859deaee871269c__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-b44b5550', 'hca_prod_a815c84b8999433f958e422c0720e00d__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-89acf5db', 'hca_prod_aefb919243fc46d7a4c129597f7ef61b__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-06565264', 'hca_prod_aff9c3cd6b844fc2abf2b9c0b3038277__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-0bb76e5c', 'hca_prod_c1810dbc16d245c3b45e3e675f88d87b__20220118_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-5d6926b7', 'hca_prod_c7c54245548b4d4fb15e0d7e238ae6c8__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-46a00828', 'hca_prod_dc1a41f69e0942a6959e3be23db6da56__20220119_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-e9f2b830', 'hca_prod_e255b1c611434fa683a8528f15b41038__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-c93c8ea6', 'hca_prod_f2fe82f044544d84b416a885f3121e59__20220119_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-d5d5cacf', 'hca_prod_fa3f460f4fb94cedb5488ba6a8ecae3f__20220330_dcp2_20220330_dcp15'), - mksrc('bigquery', 'datarepo-b60aabf3', 'hca_prod_fde199d2a8414ed1aa65b9e0af8969b1__20220330_dcp2_20220330_dcp15'), -])) - -dcp16_sources = mkdict(dcp15_sources, 250, mkdelta([ - mksrc('bigquery', 'datarepo-c531f177', 'hca_prod_0562d2ae0b8a459ebbc06357108e5da9__20220330_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-589be0ea', 'hca_prod_0b29914025b54861a69f7651ff3f46cf__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-a584f228', 'hca_prod_16cd67912adb4d0f82220184dada6456__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-84b4312d', 'hca_prod_18e5843776b740218ede3f0b443fa915__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-5ba935e0', 'hca_prod_2253ae594cc54bd2b44eecb6d3fd7646__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-bc2fe57d', 'hca_prod_24d0dbbc54eb49048141934d26f1c936__20220303_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-e227ee12', 'hca_prod_3cdaf942f8ad42e8a77b4efedb9ea7b6__20220303_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-3b6cd966', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-26738b05', 'hca_prod_6663070ffd8b41a9a4792d1e07afa201__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-3dc96215', 'hca_prod_7b393e4d65bc4c03b402aae769299329__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-c6955be8', 'hca_prod_94e4ee099b4b410a84dca751ad36d0df__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-721e0608', 'hca_prod_b51f49b40d2e4cbdbbd504cd171fc2fa__20220118_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-9f724133', 'hca_prod_b733dc1b1d5545e380367eab0821742c__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-3403d8d8', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-54c4ef0b', 'hca_prod_daa371e81ec343ef924f896d901eab6f__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-190ddba0', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20220519_dcp16'), - mksrc('bigquery', 'datarepo-b3a12f99', 'hca_prod_f4d011ced1f548a4ab61ae14176e3a6e__20220519_dcp2_20220519_dcp16'), -])) - -dcp17_sources = mkdict(dcp16_sources, 261, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-0c908bdf', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-91af9f1b', 'hca_prod_04ad400c58cb40a5bc2b2279e13a910b__20220114_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-adcbf4c3', 'hca_prod_18d4aae283634e008eebb9e568402cf8__20220330_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-4cc7b9fb', 'hca_prod_20f37aafcaa140e69123be6ce8feb2d6__20220111_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-b4210c33', 'hca_prod_2eb4f5f842a54368aa2d337bacb96197__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-ba2650b6', 'hca_prod_2fe3c60bac1a4c619b59f6556c0fce63__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-eb73a0f3', 'hca_prod_34da2c5f801148afa7fdad2f56ec10f4__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-0d3feb7c', 'hca_prod_376a7f55b8764f609cf3ed7bc83d5415__20220111_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-7cfb2129', 'hca_prod_3c27d2ddb1804b2bbf05e2e418393fd1__20220112_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-cbdb9b65', 'hca_prod_3cfcdff5dee14a7ba591c09c6e850b11__20220112_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-c1f0a228', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-261ff5ff', 'hca_prod_4f17edf6e9f042afa54af02fdca76ade__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-c9a47174', 'hca_prod_5b5f05b72482468db76d8f68c04a7a47__20220117_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-7dd487af', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-c6191eb9', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-8b966ec9', 'hca_prod_71436067ac414acebe1b2fbcc2cb02fa__20220117_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-3a9d88c9', 'hca_prod_7880637a35a14047b422b5eac2a2a358__20220117_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-63ab653d', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-b79b6e00', 'hca_prod_7be050259972493a856f3342a8d1b183__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-7d012d70', 'hca_prod_8999b4566fa6438bab17b62b1d8ec0c3__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-385ef7e4', 'hca_prod_8a40ff19e6144c50b23b5c9e1d546bab__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-0339714f', 'hca_prod_8bd2e5f694534b9b9c5659e3a40dc87e__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-492bd104', 'hca_prod_a27dd61925ad46a0ae0c5c4940a1139b__20220606_dcp2_20220607_dcp17', pop), # noqa E501 - mksrc('bigquery', 'datarepo-bc83ab27', 'hca_prod_a2a2f324cf24409ea859deaee871269c__20220330_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-10a33a05', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-e3d0317e', 'hca_prod_a9f5323ace71471c9caf04cc118fd1d7__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-cd2ab73f', 'hca_prod_ad04c8e79b7d4cceb8e901e31da10b94__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-dcd2f9cf', 'hca_prod_aff9c3cd6b844fc2abf2b9c0b3038277__20220330_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-c9b6cc1c', 'hca_prod_b9484e4edc404e389b854cecf5b8c068__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-49083689', 'hca_prod_bd7104c9a950490e94727d41c6b11c62__20220118_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-c29ee607', 'hca_prod_c302fe54d22d451fa130e24df3d6afca__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-50fa4c1b', 'hca_prod_d138a1147df54f7d9ff1f79dfd2d428f__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-9810d23f', 'hca_prod_d3446f0c30f34a12b7c36af877c7bb2d__20220119_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-3171dab6', 'hca_prod_da2747fa292142e0afd439ef57b2b88b__20220119_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-693c392c', 'hca_prod_dbcd4b1d31bd4eb594e150e8706fa192__20220119_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-b9410272', 'hca_prod_e993adcdd4ba4f889a05d1c05bdf0c45__20220606_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-4647591c', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-d20af009', 'hca_prod_f29b124a85974862ae98ff3a0fd9033e__20220303_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-959ea334', 'hca_prod_f6133d2a9f3d4ef99c19c23d6c7e6cc0__20220119_dcp2_20220607_dcp17'), - mksrc('bigquery', 'datarepo-a09e8946', 'hca_prod_fccd3f50cde247bf8972a293b5928aea__20220606_dcp2_20220607_dcp17'), - # @formatter:on -])) - -dcp18_sources = mkdict(dcp17_sources, 267, mkdelta([ - mksrc('bigquery', 'datarepo-3fa8ab06', 'hca_prod_1538d572bcb7426b8d2c84f3a7f87bb0__20220630_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-7a96c98c', 'hca_prod_8a666b76daaf4b1f9414e4807a1d1e8b__20220630_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-65814a19', 'hca_prod_9833669bd6944b93a3d06b6f9dbcfc10__20220630_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-935adc8a', 'hca_prod_9fc0064b84ce40a5a768e6eb3d364ee0__20220630_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-38233156', 'hca_prod_a27dd61925ad46a0ae0c5c4940a1139b__20220606_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-57050405', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-99178745', 'hca_prod_b4a7d12f6c2f40a39e359756997857e3__20220118_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-38191a90', 'hca_prod_b51f49b40d2e4cbdbbd504cd171fc2fa__20220118_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-0e7c311b', 'hca_prod_c4e1136978d44d29ba8eb67907c4c65c__20220630_dcp2_20220630_dcp18'), - mksrc('bigquery', 'datarepo-21969ae7', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20220630_dcp18'), -])) - -dcp19_sources = mkdict(dcp18_sources, 276, mkdelta([ - mksrc('bigquery', 'datarepo-f15e3b59', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-995a6952', 'hca_prod_04ad400c58cb40a5bc2b2279e13a910b__20220114_dcp2_20220805_dcp19'), - mksrc('bigquery', 'datarepo-d48e7a0f', 'hca_prod_0562d2ae0b8a459ebbc06357108e5da9__20220330_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-ca2968d6', 'hca_prod_05657a599f9d4bb9b77b24be13aa5cea__20220110_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-9c3b164c', 'hca_prod_074a9f88729a455dbca50ce80edf0cea__20220107_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-413da832', 'hca_prod_0777b9ef91f3468b9deadb477437aa1a__20220330_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-5b9d4163', 'hca_prod_0b29914025b54861a69f7651ff3f46cf__20220519_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4e2997d7', 'hca_prod_135f7f5c4a854bcf9f7c4f035ff1e428__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-44258246', 'hca_prod_20f37aafcaa140e69123be6ce8feb2d6__20220111_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-7302c74c', 'hca_prod_24d0dbbc54eb49048141934d26f1c936__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4ff95533', 'hca_prod_2c041c26f75a495fab36a076f89d422a__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-b839ef2e', 'hca_prod_2f67614380c24bc6b7b42613fe0fadf0__20220111_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-67845d12', 'hca_prod_34da2c5f801148afa7fdad2f56ec10f4__20220606_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-00aa7019', 'hca_prod_3c9d586ebd264b4686903faaa18ccf38__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-16ec881b', 'hca_prod_3e92c74d256c40cd927316f155da8342__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4d2761c3', 'hca_prod_40272c3b46974bd4ba3f82fa96b9bf71__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-bc41fa3c', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-6aadacef', 'hca_prod_45c2c853d06f4879957ef1366fb5d423__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-2ea93600', 'hca_prod_4d9d56e4610d4748b57df8315e3f53a3__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-a59dcc04', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-d07fd1c7', 'hca_prod_5b3285614a9740acb7ad6a90fc59d374__20220117_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-a0b7c8ae', 'hca_prod_5b5f05b72482468db76d8f68c04a7a47__20220117_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-465c9e6a', 'hca_prod_5b910a437fb54ea7b9d643dbd1bf2776__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-75be85e4', 'hca_prod_6ac8e777f9a04288b5b0446e8eba3078__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-8336318c', 'hca_prod_7be050259972493a856f3342a8d1b183__20220606_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-57425cd8', 'hca_prod_88ec040b87054f778f41f81e57632f7d__20220118_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-cc58cf4d', 'hca_prod_8f630e0f6bf94a04975402533152a954__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-c20ed7ec', 'hca_prod_91af6e2f65f244ec98e0ba4e98db22c8__20220303_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-071fb08c', 'hca_prod_94e4ee099b4b410a84dca751ad36d0df__20220519_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-2f190159', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-8cdacdcd', 'hca_prod_a1312f9a01ef40a789bf9091ca76a03a__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-36f14100', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-f0901ade', 'hca_prod_a9f5323ace71471c9caf04cc118fd1d7__20220606_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-b8655ece', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-dd7e0cf4', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-3b8b21f1', 'hca_prod_c1a9a93dd9de4e659619a9cec1052eaa__20220118_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-9e1a0138', 'hca_prod_c5f4661568de4cf4bbc2a0ae10f08243__20220118_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4af209c5', 'hca_prod_c7c54245548b4d4fb15e0d7e238ae6c8__20220330_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4888a055', 'hca_prod_d2111fac3fc44f429b6d32cd6a828267__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-5af7a6fe', 'hca_prod_d3ac7c1b53024804b611dad9f89c049d__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-3c496a8c', 'hca_prod_d71c76d336704774a9cf034249d37c60__20220213_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-9b9df5aa', 'hca_prod_d7b7beae652b4fc09bf2bcda7c7115af__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-7769445f', 'hca_prod_da9d6f243bdf4eaa9e3ff47ce2a65b36__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-68a6d3c9', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-ddce2a24', 'hca_prod_df88f39f01a84b5b92f43177d6c0f242__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-4f8a62b5', 'hca_prod_e0009214c0a04a7b96e2d6a83e966ce0__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-391955ef', 'hca_prod_e526d91dcf3a44cb80c5fd7676b55a1d__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-26d7f723', 'hca_prod_e57dc176ab98446b90c289e0842152fd__20220119_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-234ad9b8', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-125471a9', 'hca_prod_ec6476ee294941f3947b8eef41d6d3ac__20220729_dcp2_20220804_dcp19'), - mksrc('bigquery', 'datarepo-f91198ce', 'hca_prod_f86f1ab41fbb4510ae353ffd752d4dfc__20220119_dcp2_20220804_dcp19'), -])) - -dcp20_sources = mkdict(dcp19_sources, 288, mkdelta([ - mksrc('bigquery', 'datarepo-7a619b7f', 'hca_prod_34da2c5f801148afa7fdad2f56ec10f4__20220606_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-ab0fbc7f', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-86ffb30f', 'hca_prod_4c73d1e4bad24a22a0ba55abbdbdcc3d__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-68f0a599', 'hca_prod_66d7d92ad6c5492c815bf81c7c93c984__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-999b7ef7', 'hca_prod_74e2ef9d7c9f418cb2817fb38f3b1571__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-d9e203e6', 'hca_prod_7be050259972493a856f3342a8d1b183__20220606_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-60b7f35f', 'hca_prod_7f351a4cd24c4fcd9040f79071b097d0__20220906_dcp2_20220909_dcp20'), - mksrc('bigquery', 'datarepo-3ef6c389', 'hca_prod_8b9cb6ae6a434e47b9fb3df7aeec941f__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-b6764413', 'hca_prod_923d323172954184b3f6c3082766a8c7__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-c341e9ae', 'hca_prod_9a23ac2d93dd4bac9bb8040e6426db9d__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-a07e8478', 'hca_prod_9ac53858606a4b89af49804ccedaa660__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-6d6c8d6e', 'hca_prod_9b876d3107394e969846f76e6a427279__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-4bad599c', 'hca_prod_a7c66eb14a4e4f6c9e30ad2a485f8301__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-b4a08996', 'hca_prod_a815c84b8999433f958e422c0720e00d__20220330_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-90b9fc26', 'hca_prod_ac289b77fb124a6bad43c0721c698e70__20220906_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-e2dfd11f', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-bfc87e2c', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20220907_dcp20'), - mksrc('bigquery', 'datarepo-17b90eb5', 'hca_prod_fcaa53cdba574bfeaf9ceaa958f95c1a__20220906_dcp2_20220907_dcp20'), -])) - -dcp21_sources = mkdict(dcp20_sources, 293, mkdelta([ - mksrc('bigquery', 'datarepo-f8cc03ae', 'hca_prod_03c6fce7789e4e78a27a664d562bb738__20220110_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-1f11d8e0', 'hca_prod_0b29914025b54861a69f7651ff3f46cf__20220519_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-eeee85f1', 'hca_prod_1fa8b11f56fa45a6a7776af70e17a6b3__20220928_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-e2f09b06', 'hca_prod_34ec845bcd7a4c4399e4d2932d5d85bb__20220928_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-abf25e53', 'hca_prod_575c0ad9c78e469b9fdf9a68dd881137__20220928_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-41b246f0', 'hca_prod_615158205bb845d08d12f0850222ecf0__20221007_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-a5548d96', 'hca_prod_7f351a4cd24c4fcd9040f79071b097d0__20220906_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-ee2f9607', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20221011_dcp21'), - mksrc('bigquery', 'datarepo-ccacece4', 'hca_prod_c0d82ef215044ef09e5ed8a13e45fdec__20220928_dcp2_20221011_dcp21'), -])) - -dcp22_sources = mkdict(dcp21_sources, 303, mkdelta([ - mksrc('bigquery', 'datarepo-89e53cfa', 'hca_prod_0d4aaaac02c344c48ae04465f97f83ed__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-8d629004', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-69c9824a', 'hca_prod_21ea8ddb525f4f1fa82031f0360399a2__20220111_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-fe5ea9a7', 'hca_prod_2837165560ba449ea3035859b29ead65__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-96dc6466', 'hca_prod_2b81ecc46ee0438f8c5bc10b2464069e__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-330124fc', 'hca_prod_34c9a62ca6104e31b3438fb7be676f8c__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-31ee9b01', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-a03f3f9a', 'hca_prod_94023a08611d4f22a8c990956e091b2e__20220118_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-9c08dc57', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20221102_dcp22'), - mksrc('bigquery', 'datarepo-93c53553', 'hca_prod_990d251f6dab4a98a2b66cfe7e4708b9__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-93db2e27', 'hca_prod_9e3370a0144a49a99e926f6a9290125a__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-d0ed9366', 'hca_prod_c8e6c5d9fcde4845beadff96999e3051__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-a5bd2972', 'hca_prod_dcbb50d19acf4f709fdab1f63a948c49__20221101_dcp2_20221101_dcp22'), - mksrc('bigquery', 'datarepo-7e89d1a8', 'hca_prod_e4b18cd28f15490db9f1d118aa067dc3__20221101_dcp2_20221101_dcp22'), -])) - -dcp23_sources = mkdict(dcp22_sources, 313, mkdelta([ - mksrc('bigquery', 'datarepo-1a3040e1', 'hca_prod_0751843070314bdfa3ce1bf0917a1923__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-79dd7eb4', 'hca_prod_1ebe8c34454e4c28bd713a3e8b127be4__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-e0ff550c', 'hca_prod_258c5e15d1254f2d8b4ce3122548ec9b__20221208_dcp2_20221209_dcp23'), - mksrc('bigquery', 'datarepo-2a9467bc', 'hca_prod_29ed827bc5394f4cbb6bce8f9173dfb7__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-023be474', 'hca_prod_34ec845bcd7a4c4399e4d2932d5d85bb__20220928_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-5d5e636b', 'hca_prod_48b198ef3d594e57900fdf54c2435669__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-354e1286', 'hca_prod_504e0cee168840fab936361c4a831f87__20220117_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-fa6adf44', 'hca_prod_79351583b21244bab473731bdcddb407__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-678f9dd2', 'hca_prod_b7259878436c4274bfffca76f4cb7892__20220118_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-cde3a08e', 'hca_prod_cc95ff892e684a08a234480eca21ce79__20220118_dcp2_20221209_dcp23'), - mksrc('bigquery', 'datarepo-0fd803ec', 'hca_prod_cdc2d2706c99414288839bd95c041d05__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-c008323e', 'hca_prod_e0c74c7a20a445059cf138dcdd23011b__20220119_dcp2_20221209_dcp23'), - mksrc('bigquery', 'datarepo-e3e51223', 'hca_prod_e6773550c1a6494986431a3154cf2670__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-8d65fbe5', 'hca_prod_ea9eec5a4fc24c5894d02fcb598732bc__20221208_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-6a118cd6', 'hca_prod_f4d011ced1f548a4ab61ae14176e3a6e__20220519_dcp2_20221208_dcp23'), - mksrc('bigquery', 'datarepo-f7f5893b', 'hca_prod_fc381e70df1b407d813152ab523270bd__20221208_dcp2_20221208_dcp23') -])) - -dcp24_sources = mkdict(dcp23_sources, 324, mkdelta([ - mksrc('bigquery', 'datarepo-b7dfe3eb', 'hca_prod_12f320548f184dae8959bfce7e3108e7__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-27434eaf', 'hca_prod_2d4d89f2ebeb467cae60a3efc5e8d4ba__20230206_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-215ae20a', 'hca_prod_3ce9ae94c469419a96375d138a4e642f__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-7230d8d4', 'hca_prod_6e60a555fd954aa28e293ec2ef01a580__20230206_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-c5864eb0', 'hca_prod_77dedd59137648879bcadc42b56d5b7a__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-f3272b0a', 'hca_prod_8787c23889ef4636a57d3167e8b54a80__20220118_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-b1aa4336', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-83e0bc68', 'hca_prod_95d058bc9cec4c888d2c05b4a45bf24f__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-6a865365', 'hca_prod_cbd2911f252b4428abde69e270aefdfc__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-da0b7e39', 'hca_prod_cd9d6360ce38432197dff13c79e3cb84__20230206_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-19358e1b', 'hca_prod_cdabcf0b76024abf9afb3b410e545703__20230201_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-4582b46e', 'hca_prod_e57dc176ab98446b90c289e0842152fd__20220119_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-06c4cdf8', 'hca_prod_e88714c22e7849da81465a60b50628b4__20230206_dcp2_20230210_dcp24'), - mksrc('bigquery', 'datarepo-7e506a9c', 'hca_prod_f2078d5f2e7d48448552f7c41a231e52__20230201_dcp2_20230210_dcp24') -])) - -dcp25_sources = mkdict(dcp24_sources, 333, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-3b7ecb2b', 'hca_prod_0562d2ae0b8a459ebbc06357108e5da9__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-23177a5e', 'hca_prod_065e6c13ad6b46a38075c3137eb03068__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-28f02436', 'hca_prod_0751843070314bdfa3ce1bf0917a1923__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2be1e691', 'hca_prod_0777b9ef91f3468b9deadb477437aa1a__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2ace314a', 'hca_prod_0b29914025b54861a69f7651ff3f46cf__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-620d39a2', 'hca_prod_0d4aaaac02c344c48ae04465f97f83ed__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-70e4eabb', 'hca_prod_0d4b87ea6e9e456982e41343e0e3259f__20220110_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7888c2ce', 'hca_prod_102018327c7340339b653ef13d81656a__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d023038c', 'hca_prod_135f7f5c4a854bcf9f7c4f035ff1e428__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c5065d3a', 'hca_prod_1538d572bcb7426b8d2c84f3a7f87bb0__20220630_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7767d47a', 'hca_prod_165dea71a95a44e188cdb2d9ad68bb1e__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-673f4c49', 'hca_prod_1688d7cc6f5c49efb353e308b61d4e4c__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9abcd921', 'hca_prod_16cd67912adb4d0f82220184dada6456__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9b0d8d86', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-f59c20aa', 'hca_prod_16ed4ad8731946b288596fe1c1d73a82__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2b6bbe36', 'hca_prod_18d4aae283634e008eebb9e568402cf8__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-a85d395a', 'hca_prod_18e5843776b740218ede3f0b443fa915__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7ec15bbc', 'hca_prod_1dddae6e375348afb20efa22abad125d__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8e5453fd', 'hca_prod_1eb69a39b5b241ecafae5fe37f272756__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-e55df4ed', 'hca_prod_1ebe8c34454e4c28bd713a3e8b127be4__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8e161efb', 'hca_prod_1fa8b11f56fa45a6a7776af70e17a6b3__20220928_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-76ddb6a2', 'hca_prod_2084526ba66f4c40bb896fd162f2eb38__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-857c8f17', 'hca_prod_2253ae594cc54bd2b44eecb6d3fd7646__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8e9e865c', 'hca_prod_235092021e3c49598a459c5b642a1066__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-634a63c7', 'hca_prod_258c5e15d1254f2d8b4ce3122548ec9b__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c0287284', 'hca_prod_2837165560ba449ea3035859b29ead65__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6a89dcba', 'hca_prod_2a64db431b554639aabb8dba0145689d__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c215e7c1', 'hca_prod_2ad191cdbd7a409b9bd1e72b5e4cce81__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d4e011f2', 'hca_prod_2b38025da5ea4c0fb22e367824bcaf4c__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-529a323b', 'hca_prod_2b81ecc46ee0438f8c5bc10b2464069e__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-deaef690', 'hca_prod_2eb4f5f842a54368aa2d337bacb96197__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-49314a87', 'hca_prod_2f67614380c24bc6b7b42613fe0fadf0__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-573f4ced', 'hca_prod_2fe3c60bac1a4c619b59f6556c0fce63__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d8951cb5', 'hca_prod_3089d311f9ed44ddbb10397059bad4dc__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-b1d73b5d', 'hca_prod_34c9a62ca6104e31b3438fb7be676f8c__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ede741aa', 'hca_prod_34ec845bcd7a4c4399e4d2932d5d85bb__20220928_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d4b1487a', 'hca_prod_379ed69ebe0548bcaf5ea7fc589709bf__20220111_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-06d8d7f5', 'hca_prod_38e44dd0c3df418e9256d0824748901f__20220112_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-497bd309', 'hca_prod_3c9d586ebd264b4686903faaa18ccf38__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cf03259b', 'hca_prod_3cdaf942f8ad42e8a77b4efedb9ea7b6__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-b2aefc7f', 'hca_prod_3cfcdff5dee14a7ba591c09c6e850b11__20220112_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cbd0d764', 'hca_prod_3e92c74d256c40cd927316f155da8342__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-5934cc66', 'hca_prod_403c3e7668144a2da5805dd5de38c7ff__20220113_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8a262357', 'hca_prod_414accedeba0440fb721befbc5642bef__20220113_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-84e2745e', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-0baf4047', 'hca_prod_45c2c853d06f4879957ef1366fb5d423__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ff143887', 'hca_prod_48b198ef3d594e57900fdf54c2435669__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7bf7a66d', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-641f98b4', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-61ddb23f', 'hca_prod_4c73d1e4bad24a22a0ba55abbdbdcc3d__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-616afd75', 'hca_prod_4d9d56e4610d4748b57df8315e3f53a3__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fc5c976e', 'hca_prod_4ef86852aca04a9185229968e0e54dbe__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-10eb0685', 'hca_prod_4f17edf6e9f042afa54af02fdca76ade__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7964fe37', 'hca_prod_50151324f3ed435898afec352a940a61__20220113_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-82d6ee00', 'hca_prod_504e0cee168840fab936361c4a831f87__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-5fefa259', 'hca_prod_5116c0818be749c58ce073b887328aa9__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8bb6f657', 'hca_prod_54aaa409dc2848c5be26d368b4a5d5c6__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-f194b1da', 'hca_prod_575c0ad9c78e469b9fdf9a68dd881137__20220928_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-102399d2', 'hca_prod_58028aa80ed249cab60f15e2ed5989d5__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cbb1feac', 'hca_prod_591af954cdcd483996d3a0d1b1e885ac__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2917ceb6', 'hca_prod_5b3285614a9740acb7ad6a90fc59d374__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-46332822', 'hca_prod_5b910a437fb54ea7b9d643dbd1bf2776__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-04558655', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6a083ad7', 'hca_prod_602628d7c03848a8aa97ffbb2cb44c9d__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-41cca7ce', 'hca_prod_60ea42e1af4942f58164d641fdb696bc__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-96f28c63', 'hca_prod_615158205bb845d08d12f0850222ecf0__20221007_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-4f72c082', 'hca_prod_65d7a1684d624bc083244e742aa62de6__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6e86e38d', 'hca_prod_6621c827b57a4268bc80df4049140193__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-1a46eca7', 'hca_prod_6663070ffd8b41a9a4792d1e07afa201__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d9380f70', 'hca_prod_66d7d92ad6c5492c815bf81c7c93c984__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9586eb0b', 'hca_prod_68df3629d2d24eedb0aba10e0f019b88__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-31de31e5', 'hca_prod_6c040a938cf84fd598de2297eb07e9f6__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fe215496', 'hca_prod_6e522b939b704f0c9990b9cff721251b__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-bf5922e5', 'hca_prod_6f03e4ad93054bfaa5b6929ffb1d94bd__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-259b6fce', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-e3ce8d32', 'hca_prod_71eb5f6dcee04297b503b1125909b8c7__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-63751bc2', 'hca_prod_73769e0a5fcd41f4908341ae08bfa4c1__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-a7e215db', 'hca_prod_74e2ef9d7c9f418cb2817fb38f3b1571__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-0059cce8', 'hca_prod_769a08d1b8a44f1e95f76071a9827555__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-e62a1f78', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-aad6282b', 'hca_prod_79351583b21244bab473731bdcddb407__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6ffd3fa5', 'hca_prod_79b13a2a9ca142a497bd70208a11bea6__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-12a154cb', 'hca_prod_7ac8822c4ef04194adf074290611b1c6__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ff62a88c', 'hca_prod_7b393e4d65bc4c03b402aae769299329__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d4e5780e', 'hca_prod_7b947aa243a74082afff222a3e3a4635__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-664a24cb', 'hca_prod_7c75f07c608d4c4aa1b7b13d11c0ad31__20220117_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9caf17c8', 'hca_prod_8559a8ed5d8c4fb6bde8ab639cebf03c__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-3e85541c', 'hca_prod_8999b4566fa6438bab17b62b1d8ec0c3__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-a4f6e9ac', 'hca_prod_8b9cb6ae6a434e47b9fb3df7aeec941f__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d8991cff', 'hca_prod_8d566d35d8d34975a351be5e25e9b2ea__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-65954398', 'hca_prod_8f630e0f6bf94a04975402533152a954__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-3539412c', 'hca_prod_91af6e2f65f244ec98e0ba4e98db22c8__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-64e86c6c', 'hca_prod_923d323172954184b3f6c3082766a8c7__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-bd3072b5', 'hca_prod_955dfc2ca8c64d04aa4d907610545d11__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-1fc717b1', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-b7b5c053', 'hca_prod_990d251f6dab4a98a2b66cfe7e4708b9__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2736a43e', 'hca_prod_9ac53858606a4b89af49804ccedaa660__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9f5be9ac', 'hca_prod_9b876d3107394e969846f76e6a427279__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-34fd3cd0', 'hca_prod_9e3370a0144a49a99e926f6a9290125a__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-78a0de43', 'hca_prod_a1312f9a01ef40a789bf9091ca76a03a__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-be58fa69', 'hca_prod_a39728aa70a04201b0a281b7badf3e71__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-11157f54', 'hca_prod_a60803bbf7db45cfb52995436152a801__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-278d9dd8', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7a531766', 'hca_prod_a7c66eb14a4e4f6c9e30ad2a485f8301__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-39471648', 'hca_prod_a80a63f2e223489081b0415855b89abc__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-1553d994', 'hca_prod_a9301bebe9fa42feb75c84e8a460c733__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d962a1b4', 'hca_prod_a991ef154d4a4b80a93ec538b4b54127__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-60416b5f', 'hca_prod_a9f5323ace71471c9caf04cc118fd1d7__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-f2f57a7c', 'hca_prod_ac289b77fb124a6bad43c0721c698e70__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-bbe8303d', 'hca_prod_ad04c8e79b7d4cceb8e901e31da10b94__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d67d5486', 'hca_prod_ae62bb3155ca4127b0fbb1771a604645__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2a19065b', 'hca_prod_aefb919243fc46d7a4c129597f7ef61b__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ed809ac3', 'hca_prod_b7259878436c4274bfffca76f4cb7892__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-4c6b830c', 'hca_prod_b733dc1b1d5545e380367eab0821742c__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2285af8d', 'hca_prod_b9484e4edc404e389b854cecf5b8c068__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ffeff93b', 'hca_prod_bd40033154b94fccbff66bb8b079ee1f__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-208fbdb6', 'hca_prod_bd7104c9a950490e94727d41c6b11c62__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-3daf9fbc', 'hca_prod_be010abcfb684581b61f7dd7c3d7b044__20230314_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-03ab12e5', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-e798f7a3', 'hca_prod_c0d82ef215044ef09e5ed8a13e45fdec__20220928_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-3be98d08', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-097f9535', 'hca_prod_c1a9a93dd9de4e659619a9cec1052eaa__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c674ec37', 'hca_prod_c211fd49d9804ba18c6ac24254a3cb52__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7978f7d7', 'hca_prod_c31fa434c9ed4263a9b6d9ffb9d44005__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-06d2f7a5', 'hca_prod_c4e1136978d44d29ba8eb67907c4c65c__20220630_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cba6aa82', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-e7beed27', 'hca_prod_c6a50b2a3dfd4ca89b483e682f568a25__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-4c1109c3', 'hca_prod_c715cd2fdc7c44a69cd5b6a6d9f075ae__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-b9ed6937', 'hca_prod_c7c54245548b4d4fb15e0d7e238ae6c8__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-8b443018', 'hca_prod_c8e6c5d9fcde4845beadff96999e3051__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cf75399d', 'hca_prod_cd61771b661a4e19b2696e5d95350de6__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6e91dfce', 'hca_prod_cdc2d2706c99414288839bd95c041d05__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-ebcfd951', 'hca_prod_ce7b12ba664f4f798fc73de6b1892183__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2bf47b4d', 'hca_prod_d138a1147df54f7d9ff1f79dfd2d428f__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fdb31a8e', 'hca_prod_d6225aee8f0e4b20a20c682509a9ea14__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-9f65205b', 'hca_prod_d71c76d336704774a9cf034249d37c60__20220213_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-94746fdf', 'hca_prod_d7845650f6b14b1cb2fec0795416ba7b__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-29fbe41f', 'hca_prod_d8ae869c39c24cddb3fc2d0d8f60e7b8__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-225f8649', 'hca_prod_da9d6f243bdf4eaa9e3ff47ce2a65b36__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fc1febb7', 'hca_prod_daa371e81ec343ef924f896d901eab6f__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-6d15b6d0', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cf5ce794', 'hca_prod_dbcd4b1d31bd4eb594e150e8706fa192__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-a44da434', 'hca_prod_dc1a41f69e0942a6959e3be23db6da56__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c3eee222', 'hca_prod_dcbb50d19acf4f709fdab1f63a948c49__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-5ba86b6e', 'hca_prod_dd7ada843f144765b7ce9b64642bb3dc__20220212_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-a4d35f23', 'hca_prod_dd7f24360c564709bd17e526bba4cc15__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7882d448', 'hca_prod_e0c74c7a20a445059cf138dcdd23011b__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-cf662b44', 'hca_prod_e255b1c611434fa683a8528f15b41038__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fc908765', 'hca_prod_e4b18cd28f15490db9f1d118aa067dc3__20221101_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-410d6eb1', 'hca_prod_e57dc176ab98446b90c289e0842152fd__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d0d87d10', 'hca_prod_e6773550c1a6494986431a3154cf2670__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-4a7bcd86', 'hca_prod_e8808cc84ca0409680f2bba73600cba6__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-a8d7a228', 'hca_prod_e956e66aac8e483a963a0f92c7e5abfb__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-03f5da17', 'hca_prod_e993adcdd4ba4f889a05d1c05bdf0c45__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-aa5721d7', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-1c07a6e4', 'hca_prod_ea9eec5a4fc24c5894d02fcb598732bc__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-f01e417c', 'hca_prod_ec6476ee294941f3947b8eef41d6d3ac__20220729_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-b1b215b9', 'hca_prod_ede2e0b46652464fabbc0b2d964a25a0__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7bf9ffaf', 'hca_prod_ee166275f63a486481554df86c9de679__20230313_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d9d4043e', 'hca_prod_ef1d9888fa8647a4bb720ab0f20f7004__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-4cca88b5', 'hca_prod_ef1e3497515e4bbe8d4c10161854b699__20220118_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-d7d0cebe', 'hca_prod_f29b124a85974862ae98ff3a0fd9033e__20220303_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7949d5c1', 'hca_prod_f2fe82f044544d84b416a885f3121e59__20220119_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-10db03cf', 'hca_prod_f4d011ced1f548a4ab61ae14176e3a6e__20220519_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-2df3359d', 'hca_prod_fa3f460f4fb94cedb5488ba6a8ecae3f__20220330_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-7da03753', 'hca_prod_fc381e70df1b407d813152ab523270bd__20221208_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-c4cba2d7', 'hca_prod_fcaa53cdba574bfeaf9ceaa958f95c1a__20220906_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-fa96c6bd', 'hca_prod_fccd3f50cde247bf8972a293b5928aea__20220606_dcp2_20230314_dcp25'), - mksrc('bigquery', 'datarepo-289f5713', 'hca_prod_fde199d2a8414ed1aa65b9e0af8969b1__20220330_dcp2_20230314_dcp25'), - # @formatter:on -])) - -dcp26_sources = mkdict(dcp25_sources, 334, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-37928b8f', 'hca_prod_2d4d89f2ebeb467cae60a3efc5e8d4ba__20230206_dcp2_20230331_dcp26'), - mksrc('bigquery', 'datarepo-96f28c63', 'hca_prod_615158205bb845d08d12f0850222ecf0__20221007_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-0d0c66d4', 'hca_prod_c281ab637b7d4bdfb7619b1baaa18f82__20230331_dcp2_20230331_dcp26'), - mksrc('bigquery', 'datarepo-b3b1e92f', 'hca_prod_c5b475f276b34a8e8465f3b69828fec3__20230331_dcp2_20230331_dcp26'), - mksrc('bigquery', 'datarepo-baa51c1d', 'hca_prod_cd9d6360ce38432197dff13c79e3cb84__20230206_dcp2_20230331_dcp26'), - mksrc('bigquery', 'datarepo-d0d87d10', 'hca_prod_e6773550c1a6494986431a3154cf2670__20221208_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-636717a5', 'hca_prod_f3825dfe990a431fb9719c26d39840db__20230331_dcp2_20230331_dcp26'), - # @formatter:on -])) - -dcp27_sources = mkdict(dcp26_sources, 350, mkdelta([ - mksrc('bigquery', 'datarepo-75b50ae7', 'hca_prod_04e4292cf62f4098ae9bfd69ae002a90__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-d2f36342', 'hca_prod_0751843070314bdfa3ce1bf0917a1923__20221208_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-26882f0f', 'hca_prod_07d5987e7f9e4f34b0fba185a35504f5__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-c89e2c59', 'hca_prod_1fac187b1c3f41c4b6b66a9a8c0489d1__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-87be288c', 'hca_prod_30dc396411354b56b393ce2dcbc6e379__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-c4141b50', 'hca_prod_3ce9ae94c469419a96375d138a4e642f__20230201_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-4064f470', 'hca_prod_40272c3b46974bd4ba3f82fa96b9bf71__20220303_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-94eb2b77', 'hca_prod_4627f43ea43f44dd8c4b7efddb3f296d__20230501_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-e3915cae', 'hca_prod_4f4f0193ede84a828cb07a0a22f06e63__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-a498b8c7', 'hca_prod_50154d1e230844bf960810c7afaa560b__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-f800a6a6', 'hca_prod_566d00b0e1f84b929cbd57de9fad0050__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-bf159952', 'hca_prod_5f44a860d96e4a99b67e24e1b8ccfd26__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-319b1c54', 'hca_prod_6e1771950ac0468b99a287de96dc9db4__20230503_dcp2_20230503_dcp27'), - mksrc('bigquery', 'datarepo-7eec55b7', 'hca_prod_77c13c40a5984036807fbe09209ec2dd__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-daba93c4', 'hca_prod_7c5990297a3c4b5c8e79e72c9a9a65fe__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-2431c03c', 'hca_prod_aa55000c016848d890262d3a76ec8af3__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-e2b50763', 'hca_prod_c844538b88544a95bd01aacbaf86d97f__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-37a0e19e', 'hca_prod_dc0b65b0771346f0a3390b03ea786046__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-cc43a54a', 'hca_prod_e090445c69714212bc5fae4ec3914102__20230427_dcp2_20230501_dcp27'), - mksrc('bigquery', 'datarepo-054ea5fa', 'hca_prod_f6133d2a9f3d4ef99c19c23d6c7e6cc0__20220119_dcp2_20230501_dcp27'), -])) - -dcp28_sources = mkdict(dcp27_sources, 364, mkdelta([ - mksrc('bigquery', 'datarepo-60acbcdf', 'hca_prod_111d272bc25a49ac9b25e062b70d66e0__20230530_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-7ef68d1b', 'hca_prod_272b760266cd4b02a86b2b7c9c51a9ea__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-db922a93', 'hca_prod_29b5416534ee4da5b257b4c1f7343656__20230530_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-6985b629', 'hca_prod_57a2c2deb0d4465abe53a41e59e75fab__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-131b24da', 'hca_prod_77423e580fbb495a9ec2bd9a8010f21d__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-6a3a84b5', 'hca_prod_842605c7375a47c59e2ca71c2c00fcad__20220117_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-e3af5c43', 'hca_prod_8b954fb2bccb44c584e39f91e9189c40__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-bb339f80', 'hca_prod_92afaa56d501481ea027dddd72212ba8__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-f4518d09', 'hca_prod_9746f4e0d3b2454389b310288162851b__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-0ea3a03c', 'hca_prod_a4f154f85cc940b5b8d7af90afce8a8f__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-a5f69eaf', 'hca_prod_ae62bb3155ca4127b0fbb1771a604645__20230313_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-72f08c60', 'hca_prod_c3354786c17c4e53b4d7c7afbed5b208__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-d2e866b8', 'hca_prod_cae461deecbd482fa5d411d607fc12ba__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-30f2f8b0', 'hca_prod_e6773550c1a6494986431a3154cf2670__20221208_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-c3fad823', 'hca_prod_e925633fabd9486a81c61a6a66891d23__20230526_dcp2_20230530_dcp28'), - mksrc('bigquery', 'datarepo-9176f1e6', 'hca_prod_fae72d894ac44aab9b93574775e168d4__20230530_dcp2_20230530_dcp28') -])) - -dcp29_sources = mkdict(dcp28_sources, 386, mkdelta([ - mksrc('bigquery', 'datarepo-a066b1d5', 'hca_prod_01aacb6840764fd99eb9aba0f48c1b5a__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-36015bab', 'hca_prod_0d737cce1c1c493a8e2eb00143bccc12__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-34ba456a', 'hca_prod_0efecd202b524e4f96c59b4b94158713__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-7ab77437', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-cac974a7', 'hca_prod_1c4cbdd433e34dedab435958de817123__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-e13ec32c', 'hca_prod_2973a42cf81048129a235bbc9644588d__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-780b7376', 'hca_prod_2caedc30c8164b99a237b9f3b458c8e5__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-27e5c5cf', 'hca_prod_2d559a6e7cd9432f9f6e0e4df03b0888__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-340d69da', 'hca_prod_3d49e5e5976f44cbb6b9079016c31c56__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-6c9af5df', 'hca_prod_457d0bfe79e443f1be5d83bf080d809e__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-8d3a1856', 'hca_prod_5a54c6170eed486e8c1a8a8041fc1729__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-3b876136', 'hca_prod_5f607e50ba224598b1e9f3d9d7a35dcc__20230201_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-4c3d24fa', 'hca_prod_615158205bb845d08d12f0850222ecf0__20221007_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-2eac2e30', 'hca_prod_65cbfea55c544255a1d014549a86a5c1__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-2987dba5', 'hca_prod_73011a86475548ac9f70a28903b4ad77__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-a0198d42', 'hca_prod_92892ab213344b1c976114f5a73548ea__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-3525d30e', 'hca_prod_9c20a245f2c043ae82c92232ec6b594f__20220212_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-a03549fc', 'hca_prod_9f17ed7d93254723a120b00e48db20c0__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-76e3e0fa', 'hca_prod_b208466a6fb043858cfb8e03ff6b939e__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-32c91a7f', 'hca_prod_b91c623b19454727b1670a93027b0d3f__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-41efd06c', 'hca_prod_bc5512cc95444aa48b758af445ee2257__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-3ed34ae5', 'hca_prod_cea413af79b34f118b48383fe9a65fbe__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-4d68a94d', 'hca_prod_da74b50760ee4dd1bd02807bb051a337__20230614_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-72424b3d', 'hca_prod_e5fe827437694d7daa356d33c226ab43__20230616_dcp2_20230616_dcp29'), - mksrc('bigquery', 'datarepo-0e1a9ef4', 'hca_prod_f0f89c1474604bab9d4222228a91f185__20220119_dcp2_20230616_dcp29'), -])) - -dcp30_sources = mkdict(dcp29_sources, 391, mkdelta([ - mksrc('bigquery', 'datarepo-664081d7', 'hca_prod_07073c1280064710a00b23abdb814904__20220107_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-b58dd635', 'hca_prod_1c6a960d52ac44eab728a59c7ab9dc8e__20220110_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-db2290d3', 'hca_prod_1cd1f41ff81a486ba05b66ec60f81dcf__20220107_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-bb040c00', 'hca_prod_1eba4d0b2d154ba7bb3cd4654dd94519__20230815_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-3c37eadf', 'hca_prod_23587fb31a4a4f58ad74cc9a4cb4c254__20220111_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-f259cc76', 'hca_prod_279f176633194e3c9f996fb59ba9b3e5__20230815_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-b5f40aa1', 'hca_prod_31887183a72c43089eacc6140313f39c__20220111_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-f1f04670', 'hca_prod_41fb1734a121461695c73b732c9433c7__20220113_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-1c267fa5', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-9ec63340', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-1188524b', 'hca_prod_520afa10f9d24e93ab7a26c4c863ce18__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-c35a61eb', 'hca_prod_559bb888782941f2ace52c05c7eb81e9__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-1840929b', 'hca_prod_7027adc6c9c946f384ee9badc3a4f53b__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-019a64bb', 'hca_prod_739ef78aba5d4487a0139982db66d222__20230815_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-e0eccf2a', 'hca_prod_74493e9844fc48b0a58fcc7e77268b59__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-ccee34ca', 'hca_prod_783c9952a4ae4106a6ce56f20ce27f88__20220117_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-5dd80f6c', 'hca_prod_8f1f653d3ea14d8eb4a7b97dc852c2b1__20230815_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-6f3c6cc2', 'hca_prod_92afaa56d501481ea027dddd72212ba8__20230526_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-41c736b4', 'hca_prod_996120f9e84f409fa01e732ab58ca8b9__20220118_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-bdbe74eb', 'hca_prod_b208466a6fb043858cfb8e03ff6b939e__20230616_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-20f3401b', 'hca_prod_b4a7d12f6c2f40a39e359756997857e3__20220118_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-47aed999', 'hca_prod_c893cb575c9f4f26931221b85be84313__20220118_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-e7b395be', 'hca_prod_ccef38d7aa9240109621c4c7b1182647__20220118_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-b11d40e9', 'hca_prod_d3a4ceac4d66498497042570c0647a56__20220119_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-c47b01c5', 'hca_prod_d8ae869c39c24cddb3fc2d0d8f60e7b8__20230313_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-a2b3ca2a', 'hca_prod_efea6426510a4b609a19277e52bfa815__20220118_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-6f4f0e4f', 'hca_prod_f7b464770f2a4bffa9b7719e000499a3__20230815_dcp2_20230815_dcp30'), - mksrc('bigquery', 'datarepo-80208d02', 'hca_prod_f86f1ab41fbb4510ae353ffd752d4dfc__20220119_dcp2_20230815_dcp30'), -])) - -dcp31_sources = mkdict(dcp30_sources, 399, mkdelta([ - mksrc('bigquery', 'datarepo-36295e0b', 'hca_prod_0911cc0406d64ffc8318b90b0039e8ad__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-83dafa1a', 'hca_prod_279f176633194e3c9f996fb59ba9b3e5__20230815_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-789ec382', 'hca_prod_326b36bd0975475f983b56ddb8f73a4d__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-b68fee1b', 'hca_prod_3e92c74d256c40cd927316f155da8342__20220729_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-d8916247', 'hca_prod_453d7ee2319f496c986299d397870b63__20230905_dcp2_20230906_dcp31'), - mksrc('bigquery', 'datarepo-7ad0a304', 'hca_prod_4ef86852aca04a9185229968e0e54dbe__20230313_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-18691416', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-d6c0be70', 'hca_prod_577c946d6de54b55a854cd3fde40bff2__20220117_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-3352a319', 'hca_prod_6936da41369246bbbca1cd0f507991e9__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-ff7365d6', 'hca_prod_739ef78aba5d4487a0139982db66d222__20230815_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-517878d7', 'hca_prod_7dcffc327c8243969a4f88b5579bfe8a__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-088b9165', 'hca_prod_7f9766ffbb124279b34078d140bdd7ba__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-4450b12b', 'hca_prod_92892ab213344b1c976114f5a73548ea__20230616_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-f305a966', 'hca_prod_9746f4e0d3b2454389b310288162851b__20230526_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-6a51c34a', 'hca_prod_e374c1cf73fd4a7a866979dc41714984__20230905_dcp2_20230905_dcp31'), - mksrc('bigquery', 'datarepo-2abbf49d', 'hca_prod_e456c042f6b64ceca3381a8ef80bd779__20230905_dcp2_20230905_dcp31'), -])) - -dcp32_sources = mkdict(dcp31_sources, 405, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-6885133e', 'hca_prod_0792db3480474e62802c9177c9cd8e28__20220107_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-03b554f8', 'hca_prod_0911cc0406d64ffc8318b90b0039e8ad__20230905_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-a5249352', 'hca_prod_279f176633194e3c9f996fb59ba9b3e5__20230815_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-e676a270', 'hca_prod_2a72a4e566b2405abb7c1e463e8febb0__20220111_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-720eb4d9', 'hca_prod_31887183a72c43089eacc6140313f39c__20220111_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-8c4f04c0', 'hca_prod_326b36bd0975475f983b56ddb8f73a4d__20230905_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-9f296da3', 'hca_prod_376a7f55b8764f609cf3ed7bc83d5415__20220111_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-223a067e', 'hca_prod_3e92c74d256c40cd927316f155da8342__20220729_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-f053f0b1', 'hca_prod_421bc6cdbbb44398ac60a32ea94f02ae__20230929_dcp2_20231003_dcp32'), - mksrc('bigquery', 'datarepo-7221e50b', 'hca_prod_453d7ee2319f496c986299d397870b63__20230905_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-262093aa', 'hca_prod_48f60534ba4e45bcaa5b6d3a6c45962e__20230929_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-129b6bcc', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-85e28021', 'hca_prod_4ef86852aca04a9185229968e0e54dbe__20230313_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-ad300086', 'hca_prod_50154d1e230844bf960810c7afaa560b__20230427_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-6d4f90e5', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-d13e36e7', 'hca_prod_53c53cd481274e12bc7f8fe1610a715c__20220117_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-0287a0ba', 'hca_prod_577c946d6de54b55a854cd3fde40bff2__20220117_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-4d2eebce', 'hca_prod_5a54c6170eed486e8c1a8a8041fc1729__20230616_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-2917ceb6', 'hca_prod_5b3285614a9740acb7ad6a90fc59d374__20220117_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-069ac8d2', 'hca_prod_67a3de0945b949c3a068ff4665daa50e__20220117_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-7be1db07', 'hca_prod_6936da41369246bbbca1cd0f507991e9__20230905_dcp2_20231003_dcp32'), - mksrc('bigquery', 'datarepo-b59acd40', 'hca_prod_72ff481856924bbc8886e47763531023__20230929_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-a2f56dc5', 'hca_prod_739ef78aba5d4487a0139982db66d222__20230815_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-114ecc76', 'hca_prod_74e2ef9d7c9f418cb2817fb38f3b1571__20220906_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-87452786', 'hca_prod_7dcffc327c8243969a4f88b5579bfe8a__20230905_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-c844e919', 'hca_prod_bfaedc29fe844e72a46175dc8aabbd1b__20230929_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-46a39a89', 'hca_prod_c412be53cf9547c7980cc0a0caa2d3a0__20230929_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-89205d73', 'hca_prod_cfece4d2f18d44ada46a42bbcb5cb3b7__20230929_dcp2_20231002_dcp32'), - mksrc('bigquery', 'datarepo-48284a59', 'hca_prod_dcc28fb37bab48cebc4b684c00e133ce__20230905_dcp2_20231002_dcp32'), - # @formatter:on -])) - -dcp33_sources = mkdict(dcp32_sources, 412, mkdelta([ - mksrc('bigquery', 'datarepo-fe1f8660', 'hca_prod_0d737cce1c1c493a8e2eb00143bccc12__20230616_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-46bbfa8a', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-d5d4057a', 'hca_prod_1c5eaabf075b4b7aa9e607792c2034b3__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-b64e953d', 'hca_prod_1ffa222328a64133a5a4badd00faf4bc__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-ca76b840', 'hca_prod_21ea8ddb525f4f1fa82031f0360399a2__20220111_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-a0ffb40f', 'hca_prod_2af52a1365cb4973b51339be38f2df3f__20220111_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-526f3da5', 'hca_prod_3d49e5e5976f44cbb6b9079016c31c56__20230614_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-766c30b1', 'hca_prod_4ef86852aca04a9185229968e0e54dbe__20230313_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-08e00b61', 'hca_prod_5bd01deb01ee46118efdcf0ec5f56ac4__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-70ee98ab', 'hca_prod_645b20c95ed0450086b57aef770d010a__20230929_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-2e06a188', 'hca_prod_73011a86475548ac9f70a28903b4ad77__20230616_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-76a818d4', 'hca_prod_7f9766ffbb124279b34078d140bdd7ba__20230905_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-2bb1dd84', 'hca_prod_849ed38c591743c4a8f90782241cf10c__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-11a44864', 'hca_prod_8a666b76daaf4b1f9414e4807a1d1e8b__20220630_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-4bc03f16', 'hca_prod_91674dcf864140e6978dc1706feffba8__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-0b11f34c', 'hca_prod_94023a08611d4f22a8c990956e091b2e__20220118_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-bc7bde81', 'hca_prod_95f07e6e6a734e1ba880c83996b3aa5c__20220118_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-69bbc337', 'hca_prod_9c20a245f2c043ae82c92232ec6b594f__20220212_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-55e4f61e', 'hca_prod_9f17ed7d93254723a120b00e48db20c0__20230614_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-7f36ef82', 'hca_prod_cc35f94ee93b4dbda08c702978d9046f__20231101_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-3ad8245f', 'hca_prod_da74b50760ee4dd1bd02807bb051a337__20230614_dcp2_20231102_dcp33'), - mksrc('bigquery', 'datarepo-5f867d37', 'hca_prod_dbd836cfbfc241f0983441cc6c0b235a__20220212_dcp2_20231102_dcp33') -])) - -dcp34_sources = mkdict(dcp33_sources, 427, mkdelta([ - mksrc('bigquery', 'datarepo-a2f2ced7', 'hca_prod_08fb10df32e5456c9882e33fcd49077a__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-cd738b8d', 'hca_prod_10a845f7036146fa92a32a36483136b1__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-6fec2801', 'hca_prod_1538d572bcb7426b8d2c84f3a7f87bb0__20220630_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-05ece841', 'hca_prod_1dd552a5eb4f4b9280887224bcbd0629__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-1d8f9fa4', 'hca_prod_2184e63d82d84ab2839ee93f8395f568__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-64f7ba3d', 'hca_prod_222a92d5277b489caad8a680d1fd2b12__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-5f2ddddc', 'hca_prod_272b760266cd4b02a86b2b7c9c51a9ea__20230526_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-3dbccc52', 'hca_prod_2f67614380c24bc6b7b42613fe0fadf0__20220111_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-078dbc55', 'hca_prod_34c9a62ca6104e31b3438fb7be676f8c__20221101_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-a0115d6e', 'hca_prod_3cfcdff5dee14a7ba591c09c6e850b11__20220112_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-66dbe882', 'hca_prod_415eb773cadb43d1ab897d160d5cfc7d__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-37f63790', 'hca_prod_58028aa80ed249cab60f15e2ed5989d5__20220117_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-9f64fc88', 'hca_prod_581de139461f4875b40856453a9082c7__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-78a292c4', 'hca_prod_5b910a437fb54ea7b9d643dbd1bf2776__20220729_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-84d96baf', 'hca_prod_65cbfea55c544255a1d014549a86a5c1__20230616_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-96cc1349', 'hca_prod_6735ff731a04422eb500730202e46f8a__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-e8198e31', 'hca_prod_6874b7eb344547ec877375141430e169__20231213_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-43f096b1', 'hca_prod_77dedd59137648879bcadc42b56d5b7a__20230201_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-594e57c3', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-13c1e76b', 'hca_prod_8185730f411340d39cc3929271784c2b__20220117_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-10cff382', 'hca_prod_849ed38c591743c4a8f90782241cf10c__20231101_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-77cc3acc', 'hca_prod_8559a8ed5d8c4fb6bde8ab639cebf03c__20220118_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-af50b124', 'hca_prod_85c0d6faf1174d76b01a5d5e8f5f9188__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-40731b27', 'hca_prod_894ae6ac5b4841a8a72f315a9b60a62e__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-e7931a4c', 'hca_prod_925f9a4ccac0444aad2c612656ab3a85__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-96344c2b', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-7f53a7f2', 'hca_prod_abe1a013af7a45ed8c26f3793c24a1f4__20220118_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-1e2e46c4', 'hca_prod_bfaedc29fe844e72a46175dc8aabbd1b__20230929_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-2901e79b', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-5b1e55df', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-94d17e05', 'hca_prod_c1a9a93dd9de4e659619a9cec1052eaa__20220118_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-d12842d9', 'hca_prod_c4077b3c5c984d26a614246d12c2e5d7__20220118_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-abaa9315', 'hca_prod_c844538b88544a95bd01aacbaf86d97f__20230427_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-02709814', 'hca_prod_cbd3d2769f244af98381b11f6cdbdc4b__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-7da203ad', 'hca_prod_cfece4d2f18d44ada46a42bbcb5cb3b7__20230929_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-45fc3b21', 'hca_prod_da77bd0643ae4012a774e4d62797df51__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-0bc51bfc', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-8ecfd261', 'hca_prod_e49e556ada5a442ab45c8691b457623e__20231212_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-99284b34', 'hca_prod_e526d91dcf3a44cb80c5fd7676b55a1d__20220119_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-88582dc4', 'hca_prod_ede2e0b46652464fabbc0b2d964a25a0__20220118_dcp2_20231213_dcp34'), - mksrc('bigquery', 'datarepo-145a904d', 'hca_prod_ef1e3497515e4bbe8d4c10161854b699__20220118_dcp2_20231213_dcp34') -])) - -dcp35_sources = mkdict(dcp34_sources, 438, mkdelta([ - mksrc('bigquery', 'datarepo-3b981d26', 'hca_prod_17cf943be247454f908bda58665fcc56__20240201_dcp2_20240206_dcp35'), - mksrc('bigquery', 'datarepo-2a225323', 'hca_prod_1dddae6e375348afb20efa22abad125d__20220213_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-a318416f', 'hca_prod_27e2e0ae59714927aac119e81804097b__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-3403e1a6', 'hca_prod_41fb1734a121461695c73b732c9433c7__20220113_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-f2e5bb83', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-d9f05210', 'hca_prod_4f4f0193ede84a828cb07a0a22f06e63__20230427_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-ea22560e', 'hca_prod_6735ff731a04422eb500730202e46f8a__20231212_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-33c2177f', 'hca_prod_77780d5603c0481faade2038490cef9f__20220330_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-a8a3410a', 'hca_prod_7a8d45f1353b45088e8965a96785b167__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-d9783a5a', 'hca_prod_7bc1f14b5e644c7f86b023596b97e2aa__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-011b06f4', 'hca_prod_894ae6ac5b4841a8a72f315a9b60a62e__20231212_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-9a5c3a4a', 'hca_prod_896f377c8e88463e82b0b2a5409d6fe4__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-aef76795', 'hca_prod_902dc0437091445c9442d72e163b9879__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-dea71195', 'hca_prod_95f07e6e6a734e1ba880c83996b3aa5c__20220118_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-33ff6c5e', 'hca_prod_9a23ac2d93dd4bac9bb8040e6426db9d__20220906_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-060b0c69', 'hca_prod_a2a2f324cf24409ea859deaee871269c__20220330_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-5e526b40', 'hca_prod_aebc99a33151482a9709da6802617763__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-84148b68', 'hca_prod_aecfd908674c4d4eb36e0c1ceab02245__20231101_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-a31095ba', 'hca_prod_aff9c3cd6b844fc2abf2b9c0b3038277__20220330_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-9a93e11b', 'hca_prod_c302fe54d22d451fa130e24df3d6afca__20220606_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-6db90b39', 'hca_prod_c4077b3c5c984d26a614246d12c2e5d7__20220118_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-cc96a15f', 'hca_prod_c6ef0270eafc43bd8097c10020a03cfc__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-258d3043', 'hca_prod_c9e83418a9f04ed1ab4f56d9513417bf__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-faa23f13', 'hca_prod_e1fda2177ee14c1aadfa648279dafac6__20240201_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-87eefe3c', 'hca_prod_e255b1c611434fa683a8528f15b41038__20220330_dcp2_20240202_dcp35'), - mksrc('bigquery', 'datarepo-36bcfc7a', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20240202_dcp35') -])) - -dcp36_sources = mkdict(dcp35_sources, 441, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-e650c603', 'hca_prod_07d5987e7f9e4f34b0fba185a35504f5__20230427_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-bac72cd7', 'hca_prod_116965f3f09447699d28ae675c1b569c__20220107_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-4c496b01', 'hca_prod_28dd14388f4040d08e53ee3301b66218__20240301_dcp2_20240306_dcp36'), - mksrc('bigquery', 'datarepo-314aac18', 'hca_prod_377c35d193bf470c806708f954b269bd__20240301_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-9f97463d', 'hca_prod_87f519b4886241f9acff75e823e0e430__20240301_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-383230bf', 'hca_prod_9483c664d5464b309ba3efbdbf9290b4__20240301_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-f0643a05', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20240301_dcp36'), - mksrc('bigquery', 'datarepo-72f08c60', 'hca_prod_c3354786c17c4e53b4d7c7afbed5b208__20230526_dcp2_20230530_dcp28', pop), # noqa E501 - mksrc('bigquery', 'datarepo-91076846', 'hca_prod_e090445c69714212bc5fae4ec3914102__20230427_dcp2_20240301_dcp36') - # @formatter:on -])) - -dcp37_sources = mkdict(dcp36_sources, 450, mkdelta([ - mksrc('bigquery', 'datarepo-e57afe2a', 'hca_prod_2079bb2e676e4bbf8c68f9c6459edcbb__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-a37f1015', 'hca_prod_46a7e4bf04744a8f8d1843afcde90491__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-3bb4aecc', 'hca_prod_4bcc16b57a4745bbb9c0be9d5336df2d__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-cad8e8e7', 'hca_prod_581de139461f4875b40856453a9082c7__20231212_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-27a37706', 'hca_prod_60109425a6e64be1a3bc15de680317d4__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-03e157f1', 'hca_prod_6836c1e4906b4c34a11ccb025167896d__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-a1e5fe66', 'hca_prod_69324a96a68a4514bbb4f8f3ea4bd0f1__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-f9215b2b', 'hca_prod_750b455ae3cf472195818609a6c9d561__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-d8e57f88', 'hca_prod_86fe0a0c88b34a3e94a16f9feadc401e__20240327_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-f0498b78', 'hca_prod_902dc0437091445c9442d72e163b9879__20240201_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-28635bac', 'hca_prod_aebc99a33151482a9709da6802617763__20240201_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-6ac05956', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20240328_dcp37'), - mksrc('bigquery', 'datarepo-86633e77', 'hca_prod_c0fecf0baf8641b8ba82d5fd81b7542a__20240301_dcp2_20240328_dcp37') -])) - -dcp38_sources = mkdict(dcp37_sources, 455, mkdelta([ - mksrc('bigquery', 'datarepo-316d4b45', 'hca_prod_1662accf0e0c48c493145aba063f2220__20240503_dcp2_20240508_dcp38'), - mksrc('bigquery', 'datarepo-126c9c22', 'hca_prod_bcdf233f92464c0c98430514120b7e3a__20240503_dcp2_20240508_dcp38'), - mksrc('bigquery', 'datarepo-cc6b2b4f', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20240508_dcp38'), - mksrc('bigquery', 'datarepo-5292bdb6', 'hca_prod_ccc3b7861da0427fa45f76306d6143b6__20240503_dcp2_20240508_dcp38'), - mksrc('bigquery', 'datarepo-37460143', 'hca_prod_d5c91e922e7f473d8cf3ab03bbae21c2__20240503_dcp2_20240508_dcp38'), - mksrc('bigquery', 'datarepo-39884574', 'hca_prod_daef3fda262045aea3f71613814a35bf__20240503_dcp2_20240508_dcp38') -])) - -dcp39_sources = mkdict(dcp38_sources, 455, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-31abbcbe', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20240603_dcp39'), - mksrc('bigquery', 'datarepo-664a24cb', 'hca_prod_7c75f07c608d4c4aa1b7b13d11c0ad31__20220117_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-cd6f5afa', 'hca_prod_838d46603d624b08b32ddc5cbd93919d__20240531_dcp2_20240603_dcp39'), - mksrc('bigquery', 'datarepo-f6c258a6', 'hca_prod_9483c664d5464b309ba3efbdbf9290b4__20240301_dcp2_20240604_dcp39'), - mksrc('bigquery', 'datarepo-cf29bb39', 'hca_prod_f2078d5f2e7d48448552f7c41a231e52__20230201_dcp2_20240603_dcp39') - # @formatter:on -])) - -dcp40_sources = mkdict(dcp39_sources, 458, mkdelta([ - mksrc('bigquery', 'datarepo-7ff6ae27', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-083a593d', 'hca_prod_027c51c60719469fa7f5640fe57cbece__20220110_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-6e878a15', 'hca_prod_065e6c13ad6b46a38075c3137eb03068__20220213_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-d001eadd', 'hca_prod_102018327c7340339b653ef13d81656a__20220213_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-56a4f662', 'hca_prod_135f7f5c4a854bcf9f7c4f035ff1e428__20220729_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-b081c1a1', 'hca_prod_1538d572bcb7426b8d2c84f3a7f87bb0__20220630_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-0c56d5cc', 'hca_prod_16dc40f92c1342e38cdf251e95bfc043__20240708_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-2f17d9dd', 'hca_prod_16ed4ad8731946b288596fe1c1d73a82__20220111_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-98b77527', 'hca_prod_1c6a960d52ac44eab728a59c7ab9dc8e__20220110_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-8c31fd19', 'hca_prod_2d4d89f2ebeb467cae60a3efc5e8d4ba__20230206_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-5feaa5ad', 'hca_prod_31887183a72c43089eacc6140313f39c__20220111_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-c094bcbc', 'hca_prod_40272c3b46974bd4ba3f82fa96b9bf71__20220303_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-d72f8298', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-496892e7', 'hca_prod_4a95101c9ffc4f30a809f04518a23803__20220113_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-1f66dc6c', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-3b468668', 'hca_prod_4d6f6c962a8343d88fe10f53bffd4674__20220113_dcp2_20240712_dcp40'), - mksrc('bigquery', 'datarepo-03fca13b', 'hca_prod_50151324f3ed435898afec352a940a61__20220113_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-bfba7263', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-af6e91dc', 'hca_prod_577c946d6de54b55a854cd3fde40bff2__20220117_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-1a5200cb', 'hca_prod_86fd2521c5014e41841c06d79277bb7c__20240708_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-436c5a47', 'hca_prod_99101928d9b14aafb759e97958ac7403__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-e10ecf5f', 'hca_prod_a83b7f45bfb14c6a97e98e3370065cc1__20240708_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-028b06ac', 'hca_prod_ad04c8e79b7d4cceb8e901e31da10b94__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-7c60076d', 'hca_prod_ae71be1dddd84feb9bed24c3ddb6e1ad__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-27cbfba4', 'hca_prod_b963bd4b4bc14404842569d74bc636b8__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-7345f02d', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-ed0b32a2', 'hca_prod_c1a9a93dd9de4e659619a9cec1052eaa__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-9e3eace2', 'hca_prod_c211fd49d9804ba18c6ac24254a3cb52__20220303_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-4db5785d', 'hca_prod_c4077b3c5c984d26a614246d12c2e5d7__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-325d0681', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-2e8307b5', 'hca_prod_c6ad8f9bd26a4811b2ba93d487978446__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-812cbdeb', 'hca_prod_cddab57b68684be4806f395ed9dd635a__20220118_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-d8cb1e24', 'hca_prod_d3446f0c30f34a12b7c36af877c7bb2d__20220119_dcp2_20240711_dcp40'), - mksrc('bigquery', 'datarepo-bde87024', 'hca_prod_dc0b65b0771346f0a3390b03ea786046__20230427_dcp2_20240711_dcp40') -])) - -dcp41_sources = mkdict(dcp40_sources, 462, mkdelta([ - mksrc('bigquery', 'datarepo-ed01025c', 'hca_prod_0cc58d0b17344e1d9113b32e52f75e36__20240531_dcp2_20240604_dcp39'), - mksrc('bigquery', 'datarepo-50b00aaf', 'hca_prod_2079bb2e676e4bbf8c68f9c6459edcbb__20240327_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-32cb91ae', 'hca_prod_4bcc16b57a4745bbb9c0be9d5336df2d__20240327_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-17cfd151', 'hca_prod_660fc8b58fb840508c57e6313195bc81__20240806_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-cc9e8ac9', 'hca_prod_815c5ef50fb14eb798821d160362468e__20240806_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-c2886bdd', 'hca_prod_838d46603d624b08b32ddc5cbd93919d__20240531_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-832dbfa1', 'hca_prod_c16a754f5da346ed8c1e6426af2ef625__20220519_dcp2_20240807_dcp41'), - mksrc('bigquery', 'datarepo-65bb12f3', 'hca_prod_e870ab5635374b6da66f534fbf8cc57f__20240806_dcp2_20240807_dcp41') -])) - -dcp42_sources = mkdict(dcp41_sources, 470, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-db22b6c5', 'hca_prod_19037ec943a74823b93f9e59c694d17e__20240903_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-8e43554a', 'hca_prod_35d5b0573daf4ccd8112196194598893__20240903_dcp2_20240905_dcp42'), - mksrc('bigquery', 'datarepo-5b6ac433', 'hca_prod_5f1a1aee6c484dd4a2c4eb4ca6aadf74__20240903_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-d5e4c41e', 'hca_prod_7c75f07c608d4c4aa1b7b13d11c0ad31__20220117_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-eb6182b7', 'hca_prod_888f17664c8443bb8717b5f9d2046097__20240903_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-b9e1d9ec', 'hca_prod_9dd91b6e7c6249d3a3d474f603deffdb__20240903_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-582bf509', 'hca_prod_b176d75662d8493383a48b026380262f__20240903_dcp2_20240904_dcp42'), - mksrc('bigquery', 'datarepo-c85d293d', 'hca_prod_f598aee0d269403690e9d6d5b1c84429__20240903_dcp2_20240904_dcp42') - # @formatter:on -])) - -dcp43_sources = mkdict(dcp42_sources, 475, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-ac7cee91', 'hca_prod_087efc3c26014de6bbe90114593050d1__20241004_dcp2_20241007_dcp43'), - mksrc('bigquery', 'datarepo-65c49269', 'hca_prod_2ef3655a973d4d699b4121fa4041eed7__20220111_dcp2_20241004_dcp43'), - mksrc('bigquery', 'datarepo-456691e5', 'hca_prod_3627473eb6d645c987b5b9f12ce57a10__20241004_dcp2_20241007_dcp43'), - mksrc('bigquery', 'datarepo-c577eed5', 'hca_prod_7f351a4cd24c4fcd9040f79071b097d0__20220906_dcp2_20241004_dcp43'), - mksrc('bigquery', 'datarepo-1dbd3c50', 'hca_prod_ae9f439bbd474d6ebd7232dc70b35d97__20241004_dcp2_20241004_dcp43'), - mksrc('bigquery', 'datarepo-21d1f89b', 'hca_prod_b39381584e8d4fdb9e139e94270dde16__20241004_dcp2_20241004_dcp43'), - mksrc('bigquery', 'datarepo-550c8f98', 'hca_prod_c3dd819dabab4957b20988f1e0900368__20241004_dcp2_20241004_dcp43'), - mksrc('bigquery', 'datarepo-06a00830', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20241004_dcp43') - # @formatter:on -])) - -dcp44_sources = mkdict(dcp43_sources, 478, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-a4cd7c7b', 'hca_prod_027c51c60719469fa7f5640fe57cbece__20220110_dcp2_20241112_dcp44'), - mksrc('bigquery', 'datarepo-f491f2ec', 'hca_prod_065e6c13ad6b46a38075c3137eb03068__20220213_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-c107c54b', 'hca_prod_07d5987e7f9e4f34b0fba185a35504f5__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1544f53f', 'hca_prod_08fb10df32e5456c9882e33fcd49077a__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-285e7eaf', 'hca_prod_0cc58d0b17344e1d9113b32e52f75e36__20240531_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7d484792', 'hca_prod_102018327c7340339b653ef13d81656a__20220213_dcp2_20241115_dcp44'), - mksrc('bigquery', 'datarepo-a6b52720', 'hca_prod_10a845f7036146fa92a32a36483136b1__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5d2b40af', 'hca_prod_111d272bc25a49ac9b25e062b70d66e0__20230530_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7f01a691', 'hca_prod_12f320548f184dae8959bfce7e3108e7__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-9312f848', 'hca_prod_135f7f5c4a854bcf9f7c4f035ff1e428__20220729_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-92a43732', 'hca_prod_1538d572bcb7426b8d2c84f3a7f87bb0__20220630_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-285944ef', 'hca_prod_17cf943be247454f908bda58665fcc56__20240201_dcp2_20241115_dcp44'), - mksrc('bigquery', 'datarepo-acd73a95', 'hca_prod_1c4cbdd433e34dedab435958de817123__20230614_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-81029e95', 'hca_prod_1c6a960d52ac44eab728a59c7ab9dc8e__20220110_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-9966e606', 'hca_prod_1dd552a5eb4f4b9280887224bcbd0629__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6bb2f7dc', 'hca_prod_1dddae6e375348afb20efa22abad125d__20220213_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-052b303a', 'hca_prod_1eba4d0b2d154ba7bb3cd4654dd94519__20230815_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-9817de52', 'hca_prod_2184e63d82d84ab2839ee93f8395f568__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e03d3225', 'hca_prod_222a92d5277b489caad8a680d1fd2b12__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-d5cbd84c', 'hca_prod_235092021e3c49598a459c5b642a1066__20230313_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6529cd88', 'hca_prod_24d0dbbc54eb49048141934d26f1c936__20220303_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7f2e151b', 'hca_prod_27e2e0ae59714927aac119e81804097b__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1bbc6412', 'hca_prod_28dd14388f4040d08e53ee3301b66218__20240301_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-65288d14', 'hca_prod_29b5416534ee4da5b257b4c1f7343656__20230530_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-df397f52', 'hca_prod_2caedc30c8164b99a237b9f3b458c8e5__20230614_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-52af9737', 'hca_prod_2d4d89f2ebeb467cae60a3efc5e8d4ba__20230206_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5b3c92c1', 'hca_prod_2ef3655a973d4d699b4121fa4041eed7__20220111_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-bdf9501b', 'hca_prod_2fe3c60bac1a4c619b59f6556c0fce63__20220606_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e1452566', 'hca_prod_3089d311f9ed44ddbb10397059bad4dc__20220111_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e8d32f19', 'hca_prod_30dc396411354b56b393ce2dcbc6e379__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-bfcbd18f', 'hca_prod_31887183a72c43089eacc6140313f39c__20220111_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a26a6e31', 'hca_prod_3373e59c525f4a838c9cd8b280454697__20241104_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-c6ed9e98', 'hca_prod_35d5b0573daf4ccd8112196194598893__20240903_dcp2_20241118_dcp44'), - mksrc('bigquery', 'datarepo-dc8f8d03', 'hca_prod_377c35d193bf470c806708f954b269bd__20240301_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-23475197', 'hca_prod_3ce9ae94c469419a96375d138a4e642f__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7479ecb2', 'hca_prod_40272c3b46974bd4ba3f82fa96b9bf71__20220303_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-2d8292ac', 'hca_prod_415eb773cadb43d1ab897d160d5cfc7d__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f6668a9c', 'hca_prod_425c2759db664c93a358a562c069b1f1__20220519_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6c0c74f9', 'hca_prod_453d7ee2319f496c986299d397870b63__20230905_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a2e83020', 'hca_prod_457d0bfe79e443f1be5d83bf080d809e__20230616_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-46ccc1c1', 'hca_prod_46a7e4bf04744a8f8d1843afcde90491__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a1a1c788', 'hca_prod_4bec484dca7a47b48d488830e06ad6db__20220113_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-64d19b39', 'hca_prod_4c8e9d75d85a47de959806549cf44b91__20241104_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3b02f77f', 'hca_prod_4d6f6c962a8343d88fe10f53bffd4674__20220113_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3770b6e0', 'hca_prod_4f4f0193ede84a828cb07a0a22f06e63__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-538dafe5', 'hca_prod_50151324f3ed435898afec352a940a61__20220113_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-acb1f8c3', 'hca_prod_50154d1e230844bf960810c7afaa560b__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-517d86dc', 'hca_prod_51f02950ee254f4b8d0759aa99bb3498__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1b8e989c', 'hca_prod_58028aa80ed249cab60f15e2ed5989d5__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-29937178', 'hca_prod_5a54c6170eed486e8c1a8a8041fc1729__20230616_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-4554e0e3', 'hca_prod_5f1a1aee6c484dd4a2c4eb4ca6aadf74__20240903_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e255bf81', 'hca_prod_5f607e50ba224598b1e9f3d9d7a35dcc__20230201_dcp2_20241115_dcp44'), - mksrc('bigquery', 'datarepo-4383dd44', 'hca_prod_60109425a6e64be1a3bc15de680317d4__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3689cbc4', 'hca_prod_60ea42e1af4942f58164d641fdb696bc__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f2c9e5bf', 'hca_prod_615158205bb845d08d12f0850222ecf0__20221007_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-9b1ffe66', 'hca_prod_645b20c95ed0450086b57aef770d010a__20230929_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-fe3604ca', 'hca_prod_660fc8b58fb840508c57e6313195bc81__20240806_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-fe39b414', 'hca_prod_66d7d92ad6c5492c815bf81c7c93c984__20220906_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-b0222171', 'hca_prod_6836c1e4906b4c34a11ccb025167896d__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5107f4a2', 'hca_prod_6874b7eb344547ec877375141430e169__20231213_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a1a451e6', 'hca_prod_69324a96a68a4514bbb4f8f3ea4bd0f1__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-80c360f9', 'hca_prod_6936da41369246bbbca1cd0f507991e9__20230905_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3c354503', 'hca_prod_6ac8e777f9a04288b5b0446e8eba3078__20220303_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-c6da3d05', 'hca_prod_6e522b939b704f0c9990b9cff721251b__20230313_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-153c5b86', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7d510074', 'hca_prod_73769e0a5fcd41f4908341ae08bfa4c1__20220330_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1b612d2f', 'hca_prod_750b455ae3cf472195818609a6c9d561__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-ac75d2f9', 'hca_prod_769a08d1b8a44f1e95f76071a9827555__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-dce10d00', 'hca_prod_77c13c40a5984036807fbe09209ec2dd__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-18dc896d', 'hca_prod_783c9952a4ae4106a6ce56f20ce27f88__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-644c3917', 'hca_prod_7a8d45f1353b45088e8965a96785b167__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-8d379d5d', 'hca_prod_7ac8822c4ef04194adf074290611b1c6__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-0ef114d8', 'hca_prod_7adede6a0ab745e69b67ffe7466bec1f__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-9c685691', 'hca_prod_7bc1f14b5e644c7f86b023596b97e2aa__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-ae7a821e', 'hca_prod_7c5990297a3c4b5c8e79e72c9a9a65fe__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-91bd47d7', 'hca_prod_7f351a4cd24c4fcd9040f79071b097d0__20220906_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a90007b2', 'hca_prod_8185730f411340d39cc3929271784c2b__20220117_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-baff1e7b', 'hca_prod_85c0d6faf1174d76b01a5d5e8f5f9188__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-4e239288', 'hca_prod_86fe0a0c88b34a3e94a16f9feadc401e__20240327_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-30f63e1c', 'hca_prod_87f519b4886241f9acff75e823e0e430__20240301_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f391a682', 'hca_prod_888f17664c8443bb8717b5f9d2046097__20240903_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5a9f99a9', 'hca_prod_894ae6ac5b4841a8a72f315a9b60a62e__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-cf1a6aa5', 'hca_prod_8b9cb6ae6a434e47b9fb3df7aeec941f__20220906_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1c32d243', 'hca_prod_902dc0437091445c9442d72e163b9879__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-346ec822', 'hca_prod_91674dcf864140e6978dc1706feffba8__20231101_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-4f521c22', 'hca_prod_923d323172954184b3f6c3082766a8c7__20220906_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f8c1c1ea', 'hca_prod_925f9a4ccac0444aad2c612656ab3a85__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-11b5e221', 'hca_prod_92afaa56d501481ea027dddd72212ba8__20230526_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5b5b0ae7', 'hca_prod_9483c664d5464b309ba3efbdbf9290b4__20240301_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f6d99590', 'hca_prod_94e4ee099b4b410a84dca751ad36d0df__20220519_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6b371ac5', 'hca_prod_957261f72bd64358a6ed24ee080d5cfc__20220330_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-ca228387', 'hca_prod_95d058bc9cec4c888d2c05b4a45bf24f__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-08513423', 'hca_prod_95f07e6e6a734e1ba880c83996b3aa5c__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-02ac07f3', 'hca_prod_990d251f6dab4a98a2b66cfe7e4708b9__20221101_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6adaba0e', 'hca_prod_99101928d9b14aafb759e97958ac7403__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-bd836b40', 'hca_prod_9b876d3107394e969846f76e6a427279__20220906_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-47e132f5', 'hca_prod_9bef1e81e5d94ece81cbab7449232021__20241104_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-8e2c06ed', 'hca_prod_9dd91b6e7c6249d3a3d474f603deffdb__20240903_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-b45fddfb', 'hca_prod_a4f154f85cc940b5b8d7af90afce8a8f__20230526_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-18b2a11c', 'hca_prod_ad04c8e79b7d4cceb8e901e31da10b94__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-950c161b', 'hca_prod_ae62bb3155ca4127b0fbb1771a604645__20230313_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-ca6f869a', 'hca_prod_ae71be1dddd84feb9bed24c3ddb6e1ad__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-168f955e', 'hca_prod_aebc99a33151482a9709da6802617763__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-26751345', 'hca_prod_b176d75662d8493383a48b026380262f__20240903_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-4607bc00', 'hca_prod_b208466a6fb043858cfb8e03ff6b939e__20230616_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-6d0ddc88', 'hca_prod_b91c623b19454727b1670a93027b0d3f__20230616_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-49f33629', 'hca_prod_b963bd4b4bc14404842569d74bc636b8__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-8673c791', 'hca_prod_bc5512cc95444aa48b758af445ee2257__20230614_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-79899546', 'hca_prod_bcdf233f92464c0c98430514120b7e3a__20240503_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f0f918aa', 'hca_prod_bd40033154b94fccbff66bb8b079ee1f__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-92f46628', 'hca_prod_c0fecf0baf8641b8ba82d5fd81b7542a__20240301_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-c0391f5d', 'hca_prod_c211fd49d9804ba18c6ac24254a3cb52__20220303_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-ea95f69b', 'hca_prod_c41dffbfad83447ca0e113e689d9b258__20220118_dcp2_20241112_dcp44'), - mksrc('bigquery', 'datarepo-ee90ca9d', 'hca_prod_c4e1136978d44d29ba8eb67907c4c65c__20220630_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-67c06dec', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-40e3cc19', 'hca_prod_c6ad8f9bd26a4811b2ba93d487978446__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-1373f076', 'hca_prod_c6ef0270eafc43bd8097c10020a03cfc__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-aaf93620', 'hca_prod_c893cb575c9f4f26931221b85be84313__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-33089fb6', 'hca_prod_c9e83418a9f04ed1ab4f56d9513417bf__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-5d94c9fa', 'hca_prod_cae461deecbd482fa5d411d607fc12ba__20230526_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-b96ab3a4', 'hca_prod_cbd2911f252b4428abde69e270aefdfc__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e139b9c0', 'hca_prod_cbd3d2769f244af98381b11f6cdbdc4b__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a3e9f4f3', 'hca_prod_ccc3b7861da0427fa45f76306d6143b6__20240503_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3eb0c0d8', 'hca_prod_cd61771b661a4e19b2696e5d95350de6__20220213_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-40f156cd', 'hca_prod_cdabcf0b76024abf9afb3b410e545703__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-c2bf25d4', 'hca_prod_cddab57b68684be4806f395ed9dd635a__20220118_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-3bc1fca5', 'hca_prod_d3446f0c30f34a12b7c36af877c7bb2d__20220119_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-70f84f46', 'hca_prod_d8ae869c39c24cddb3fc2d0d8f60e7b8__20230313_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-daeda853', 'hca_prod_da77bd0643ae4012a774e4d62797df51__20231212_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-aaf5b372', 'hca_prod_daa371e81ec343ef924f896d901eab6f__20220519_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-258af604', 'hca_prod_daef3fda262045aea3f71613814a35bf__20240503_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-89bbe20f', 'hca_prod_dcbb50d19acf4f709fdab1f63a948c49__20221101_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e18d4e0f', 'hca_prod_e090445c69714212bc5fae4ec3914102__20230427_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-afd111d1', 'hca_prod_e1fda2177ee14c1aadfa648279dafac6__20240201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7a453efe', 'hca_prod_e456c042f6b64ceca3381a8ef80bd779__20230905_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-a81f6726', 'hca_prod_e5fe827437694d7daa356d33c226ab43__20230616_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-973f472d', 'hca_prod_e77fed30959d4fadbc15a0a5a85c21d2__20220119_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-eca4f16f', 'hca_prod_e870ab5635374b6da66f534fbf8cc57f__20240806_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-f401aa72', 'hca_prod_e956e66aac8e483a963a0f92c7e5abfb__20230313_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-e61457b8', 'hca_prod_e9f36305d85744a393f0df4e6007dc97__20220519_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-cd1f7849', 'hca_prod_f2078d5f2e7d48448552f7c41a231e52__20230201_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-7d26acbb', 'hca_prod_f86f1ab41fbb4510ae353ffd752d4dfc__20220119_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-b137f55d', 'hca_prod_fae72d894ac44aab9b93574775e168d4__20230530_dcp2_20241107_dcp44'), - mksrc('bigquery', 'datarepo-44e393e0', 'hca_prod_fcaa53cdba574bfeaf9ceaa958f95c1a__20220906_dcp2_20241107_dcp44'), - # @formatter:on -])) - -dcp45_sources = mkdict(dcp44_sources, 484, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-44d8565d', 'hca_prod_005d611a14d54fbf846e571a1f874f70__20220111_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-46f6eb97', 'hca_prod_01aacb6840764fd99eb9aba0f48c1b5a__20230616_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-6130b203', 'hca_prod_08fb10df32e5456c9882e33fcd49077a__20231212_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-94e55f6a', 'hca_prod_2433c1e45a1246a5b2d5d3554a7694f2__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-bcdeba16', 'hca_prod_40bb5783c9244d19b6cbd26a8d3ae1d8__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-059abc16', 'hca_prod_474a4229840e4d6382af8d3aa615ee17__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-8d869988', 'hca_prod_7fc0a7569b064e63a7806c9fc3f9d76d__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-f7e93b13', 'hca_prod_9762d70c9b274f578cbc377b9b92ea9b__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-66f332bc', 'hca_prod_ae9f439bbd474d6ebd7232dc70b35d97__20241004_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-48f5e511', 'hca_prod_df8eb7ce370746afb823e081a562e954__20241205_dcp2_20241205_dcp45'), - mksrc('bigquery', 'datarepo-6c94b83b', 'hca_prod_e255b1c611434fa683a8528f15b41038__20220330_dcp2_20241205_dcp45'), - # @formatter:on -])) - -dcp46_sources = mkdict(dcp45_sources, 489, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-6ecaa13f', 'hca_prod_031980e69f2b433a8f6e081bd9aad0a3__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-2be1e691', 'hca_prod_0777b9ef91f3468b9deadb477437aa1a__20220330_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-e8dc17ba', 'hca_prod_0cc58d0b17344e1d9113b32e52f75e36__20240531_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-88134e98', 'hca_prod_102018327c7340339b653ef13d81656a__20220213_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-4e5e9f9b', 'hca_prod_2043c65a1cf84828a6569e247d4e64f1__20220111_dcp2_20220120_dcp12', pop), # noqa E501 - mksrc('bigquery', 'datarepo-c3a6307c', 'hca_prod_2433c1e45a1246a5b2d5d3554a7694f2__20241205_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-3928cdc0', 'hca_prod_24d0dbbc54eb49048141934d26f1c936__20220303_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-996ef22b', 'hca_prod_3089d311f9ed44ddbb10397059bad4dc__20220111_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-51fe137d', 'hca_prod_3373e59c525f4a838c9cd8b280454697__20241104_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-3d34fbb4', 'hca_prod_3bb95c61630d4970ae408a951bd11cc1__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-29421115', 'hca_prod_40bb5783c9244d19b6cbd26a8d3ae1d8__20241205_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-180ff3a3', 'hca_prod_46a7e4bf04744a8f8d1843afcde90491__20240327_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-0e25311c', 'hca_prod_474a4229840e4d6382af8d3aa615ee17__20241205_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-1e1baa84', 'hca_prod_50151324f3ed435898afec352a940a61__20220113_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-d45e266f', 'hca_prod_57916660af5a44d5a7a92e84b65f8a68__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-ec04039e', 'hca_prod_5f607e50ba224598b1e9f3d9d7a35dcc__20230201_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-cc39ad0b', 'hca_prod_60109425a6e64be1a3bc15de680317d4__20240327_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-b46d2c92', 'hca_prod_60ea42e1af4942f58164d641fdb696bc__20220117_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-f0716cdf', 'hca_prod_6836c1e4906b4c34a11ccb025167896d__20240327_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-bab0d9fc', 'hca_prod_6f89a7f38d4a4344aa4feccfe7e91076__20220213_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-b2c70f75', 'hca_prod_750b455ae3cf472195818609a6c9d561__20240327_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-ec282ab2', 'hca_prod_783c9952a4ae4106a6ce56f20ce27f88__20220117_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-ff62a88c', 'hca_prod_7b393e4d65bc4c03b402aae769299329__20220519_dcp2_20230314_dcp25', pop), # noqa E501 - mksrc('bigquery', 'datarepo-a2ed4715', 'hca_prod_7f980afc4e014b9ca9a1f50e17fce8c2__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-644d80c0', 'hca_prod_84d1697fd4af42c29a5037fb5842c586__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-226acb8e', 'hca_prod_9dd91b6e7c6249d3a3d474f603deffdb__20240903_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-9f7d0bf5', 'hca_prod_9f7aa40170e34695951a30541a1434eb__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-418bfa3b', 'hca_prod_ccc3b7861da0427fa45f76306d6143b6__20240503_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-1fdfaac3', 'hca_prod_cdabcf0b76024abf9afb3b410e545703__20230201_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-8e29c61a', 'hca_prod_d3446f0c30f34a12b7c36af877c7bb2d__20220119_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-ff63dc94', 'hca_prod_dcbb50d19acf4f709fdab1f63a948c49__20221101_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-b72037cb', 'hca_prod_e255b1c611434fa683a8528f15b41038__20220330_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-1514f0eb', 'hca_prod_e579d0f418ef4d4290fa4d2afb60a862__20250203_dcp2_20250211_dcp46'), - mksrc('bigquery', 'datarepo-753fc76a', 'hca_prod_f77290ae0d7b4239b0fe3cf2c9e8858d__20250203_dcp2_20250211_dcp46'), - # @formatter:on -])) - -dcp47_sources = mkdict(dcp46_sources, 494, mkdelta([ - mksrc('bigquery', 'datarepo-3c5378af', 'hca_prod_03c6fce7789e4e78a27a664d562bb738__20220110_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-2590222b', 'hca_prod_04ad400c58cb40a5bc2b2279e13a910b__20220114_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-f35c4c99', 'hca_prod_04e4292cf62f4098ae9bfd69ae002a90__20230427_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-7926539f', 'hca_prod_0562d2ae0b8a459ebbc06357108e5da9__20220330_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-96dd015a', 'hca_prod_05657a599f9d4bb9b77b24be13aa5cea__20220110_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-f1f90008', 'hca_prod_05be4f374506429bb112506444507d62__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-e4925842', 'hca_prod_06c7dd8d6cc64b79b7958805c47d36e1__20220213_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-32ae3e6c', 'hca_prod_07073c1280064710a00b23abdb814904__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-d6204096', 'hca_prod_074a9f88729a455dbca50ce80edf0cea__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-e784e4fd', 'hca_prod_0751843070314bdfa3ce1bf0917a1923__20221208_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-b203eddb', 'hca_prod_0777b9ef91f3468b9deadb477437aa1a__20220330_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-a4d50172', 'hca_prod_0792db3480474e62802c9177c9cd8e28__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4b0b26b3', 'hca_prod_08b794a0519c4516b184c583746254c5__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-2aaefe9a', 'hca_prod_0911cc0406d64ffc8318b90b0039e8ad__20230905_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-e8a91d58', 'hca_prod_091cf39b01bc42e59437f419a66c8a45__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4c79cb59', 'hca_prod_0b29914025b54861a69f7651ff3f46cf__20220519_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-c57aaae4', 'hca_prod_0c09fadee0794fde8e606725b8c1d84b__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-085859a8', 'hca_prod_0c3b7785f74d40918616a68757e4c2a8__20220111_dcp2_20250305_dcp47'), - mksrc('bigquery', 'datarepo-770a569a', 'hca_prod_0d4aaaac02c344c48ae04465f97f83ed__20221101_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-6d9a8207', 'hca_prod_0d4b87ea6e9e456982e41343e0e3259f__20220110_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-e270be56', 'hca_prod_0efecd202b524e4f96c59b4b94158713__20230614_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-93aa3444', 'hca_prod_0fd8f91862d64b8bac354c53dd601f71__20220110_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-c009c4fa', 'hca_prod_116965f3f09447699d28ae675c1b569c__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-364b83a2', 'hca_prod_165dea71a95a44e188cdb2d9ad68bb1e__20220303_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-a1039074', 'hca_prod_1688d7cc6f5c49efb353e308b61d4e4c__20230313_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-ce0a0e21', 'hca_prod_16cd67912adb4d0f82220184dada6456__20220519_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-39ccfc9c', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-78e6040d', 'hca_prod_18d4aae283634e008eebb9e568402cf8__20220330_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-21acc033', 'hca_prod_18e5843776b740218ede3f0b443fa915__20220519_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-8105677b', 'hca_prod_1cd1f41ff81a486ba05b66ec60f81dcf__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-cb0d39a0', 'hca_prod_1ce3b3dc02f244a896dad6d107b27a76__20220107_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-8b9a1bbf', 'hca_prod_1defdadaa36544ad9b29443b06bd11d6__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-eee8ecce', 'hca_prod_1eb69a39b5b241ecafae5fe37f272756__20220213_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-80477604', 'hca_prod_1ebe8c34454e4c28bd713a3e8b127be4__20221208_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-40766c2f', 'hca_prod_1fa8b11f56fa45a6a7776af70e17a6b3__20220928_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-fa586d18', 'hca_prod_1fac187b1c3f41c4b6b66a9a8c0489d1__20230427_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1ffce377', 'hca_prod_2084526ba66f4c40bb896fd162f2eb38__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-05637602', 'hca_prod_2086eb0510b9432bb7f0169ccc49d270__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-31f6c2b1', 'hca_prod_20f37aafcaa140e69123be6ce8feb2d6__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-26ec96aa', 'hca_prod_2253ae594cc54bd2b44eecb6d3fd7646__20220519_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-bfed6fd4', 'hca_prod_23587fb31a4a4f58ad74cc9a4cb4c254__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-cb6b77e6', 'hca_prod_248fcf0316c64a41b6ccaad4d894ca42__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-790b4ce9', 'hca_prod_24c654a5caa5440a8f02582921f2db4a__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-d115c83c', 'hca_prod_258c5e15d1254f2d8b4ce3122548ec9b__20221208_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-febeb27d', 'hca_prod_272b760266cd4b02a86b2b7c9c51a9ea__20230526_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-c22d601a', 'hca_prod_279f176633194e3c9f996fb59ba9b3e5__20230815_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-ec4e5100', 'hca_prod_2837165560ba449ea3035859b29ead65__20221101_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-585ee422', 'hca_prod_2973a42cf81048129a235bbc9644588d__20230614_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1956d072', 'hca_prod_2a64db431b554639aabb8dba0145689d__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-78487f46', 'hca_prod_2a72a4e566b2405abb7c1e463e8febb0__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-2c545be5', 'hca_prod_2ad191cdbd7a409b9bd1e72b5e4cce81__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-fdde7aeb', 'hca_prod_2b38025da5ea4c0fb22e367824bcaf4c__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-0f359191', 'hca_prod_2b81ecc46ee0438f8c5bc10b2464069e__20221101_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-6a2aed44', 'hca_prod_2c041c26f75a495fab36a076f89d422a__20220303_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-3db654ee', 'hca_prod_2d559a6e7cd9432f9f6e0e4df03b0888__20230614_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-34e5d3e0', 'hca_prod_2d8460958a334f3c97d4585bafac13b4__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-570e9946', 'hca_prod_2eb4f5f842a54368aa2d337bacb96197__20220606_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4391a4ff', 'hca_prod_2f67614380c24bc6b7b42613fe0fadf0__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-cc13a8b0', 'hca_prod_326b36bd0975475f983b56ddb8f73a4d__20230905_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1fd78084', 'hca_prod_34c9a62ca6104e31b3438fb7be676f8c__20221101_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-df6f3ce2', 'hca_prod_34cba5e9ecb14d81bf0848987cd63073__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-6a24f127', 'hca_prod_34da2c5f801148afa7fdad2f56ec10f4__20220606_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-fd090af1', 'hca_prod_34ec845bcd7a4c4399e4d2932d5d85bb__20220928_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-103f6ed4', 'hca_prod_376a7f55b8764f609cf3ed7bc83d5415__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-df24d80d', 'hca_prod_379ed69ebe0548bcaf5ea7fc589709bf__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-73262b6b', 'hca_prod_38449aea70b540db84b31e08f32efe34__20220111_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-7c8ba452', 'hca_prod_38e3413162fc4323b43515113dfd6dcc__20250228_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-e2ece787', 'hca_prod_38e44dd0c3df418e9256d0824748901f__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-c3ace62f', 'hca_prod_3a69470330844ece9abed935fd5f6748__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-204432cf', 'hca_prod_3c27d2ddb1804b2bbf05e2e418393fd1__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-11a2ab55', 'hca_prod_3c9d586ebd264b4686903faaa18ccf38__20220729_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-2651413e', 'hca_prod_3cdaf942f8ad42e8a77b4efedb9ea7b6__20220303_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-de4d114b', 'hca_prod_3cfcdff5dee14a7ba591c09c6e850b11__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-b6823d07', 'hca_prod_3e329187a9c448ec90e3cc45f7c2311c__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-50f846fa', 'hca_prod_3e92c74d256c40cd927316f155da8342__20220729_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1a165fa0', 'hca_prod_4037007b0eff4e6db7bd8dd8eec80143__20220112_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-8184d295', 'hca_prod_403c3e7668144a2da5805dd5de38c7ff__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1bf756f5', 'hca_prod_40ca2a03ec0f471fa834948199495fe7__20220330_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-3acd6add', 'hca_prod_414accedeba0440fb721befbc5642bef__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4d222ee6', 'hca_prod_41fb1734a121461695c73b732c9433c7__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-cef20640', 'hca_prod_421bc6cdbbb44398ac60a32ea94f02ae__20230929_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-6787198d', 'hca_prod_42d4f8d454224b78adaee7c3c2ef511c__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-bc91141c', 'hca_prod_45c2c853d06f4879957ef1366fb5d423__20220303_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-a516c1e4', 'hca_prod_4627f43ea43f44dd8c4b7efddb3f296d__20230501_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-1b8ad176', 'hca_prod_48b198ef3d594e57900fdf54c2435669__20221208_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-c08474ae', 'hca_prod_48f60534ba4e45bcaa5b6d3a6c45962e__20230929_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-d69a3fbc', 'hca_prod_4af795f73e1d4341b8674ac0982b9efd__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-a748d93f', 'hca_prod_4c73d1e4bad24a22a0ba55abbdbdcc3d__20220906_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-0d617d9f', 'hca_prod_4d9d56e4610d4748b57df8315e3f53a3__20220729_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-63d32334', 'hca_prod_4e6f083b5b9a439398902a83da8188f1__20220113_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-2a2d1eea', 'hca_prod_4ef86852aca04a9185229968e0e54dbe__20230313_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-93268fac', 'hca_prod_4f17edf6e9f042afa54af02fdca76ade__20220606_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-ef47026a', 'hca_prod_504e0cee168840fab936361c4a831f87__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-b556a56f', 'hca_prod_5116c0818be749c58ce073b887328aa9__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4fa6bd68', 'hca_prod_520afa10f9d24e93ab7a26c4c863ce18__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-fe73ea6e', 'hca_prod_52b29aa4c8d642b4807ab35be94469ca__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-53d53d6f', 'hca_prod_52d10a60c8d14d068a5eaf0d5c0d5034__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-ecf619d2', 'hca_prod_53c53cd481274e12bc7f8fe1610a715c__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-4ca27b9a', 'hca_prod_54aaa409dc2848c5be26d368b4a5d5c6__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-932ab85b', 'hca_prod_559bb888782941f2ace52c05c7eb81e9__20220117_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-76ead5af', 'hca_prod_64809a52f7034aecb3a5eca808a971d0__20250227_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-bf994170', 'hca_prod_90588227d8c147eea3970d0b1d79aea9__20250301_dcp2_20250304_dcp47'), - mksrc('bigquery', 'datarepo-660076de', 'hca_prod_fc2a0b4e1e4a447ba09747b398402f37__20250227_dcp2_20250304_dcp47'), -])) - -dcp48_sources = mkdict(dcp47_sources, 499, mkdelta([ - mksrc('bigquery', 'datarepo-011bff46', 'hca_prod_0d737cce1c1c493a8e2eb00143bccc12__20230616_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a9a9ad32', 'hca_prod_50db6ba439864d5586b7e1a5a888a17b__20250404_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-e3d00fca', 'hca_prod_566d00b0e1f84b929cbd57de9fad0050__20230427_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a05eee72', 'hca_prod_56e73ccb7ae94faea738acfb69936d7a__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-2f505c90', 'hca_prod_575c0ad9c78e469b9fdf9a68dd881137__20220928_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-0a11e1ec', 'hca_prod_57a2c2deb0d4465abe53a41e59e75fab__20230526_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-07c9f071', 'hca_prod_591af954cdcd483996d3a0d1b1e885ac__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-d6dafbeb', 'hca_prod_5b5f05b72482468db76d8f68c04a7a47__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-6afd7353', 'hca_prod_5b910a437fb54ea7b9d643dbd1bf2776__20220729_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-02558f60', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-92539b02', 'hca_prod_5bd01deb01ee46118efdcf0ec5f56ac4__20231101_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-75e2bf1e', 'hca_prod_5ee710d7e2d54fe2818d15f5e31dae32__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f8401f40', 'hca_prod_5f44a860d96e4a99b67e24e1b8ccfd26__20230427_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-b3417b02', 'hca_prod_602628d7c03848a8aa97ffbb2cb44c9d__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f4fbdb56', 'hca_prod_6072616c87944b208f52fb15992ea5a4__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a53e10b7', 'hca_prod_63b5b6c1bbcd487d8c2e0095150c1ecd__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f6cda77e', 'hca_prod_65858543530d48a6a670f972b34dfe10__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-09d3c9a0', 'hca_prod_65cbfea55c544255a1d014549a86a5c1__20230616_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-aa587adb', 'hca_prod_65d7a1684d624bc083244e742aa62de6__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-16e3113c', 'hca_prod_6621c827b57a4268bc80df4049140193__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-37e49b45', 'hca_prod_6663070ffd8b41a9a4792d1e07afa201__20220519_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c3fbd449', 'hca_prod_6735ff731a04422eb500730202e46f8a__20231212_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-fb0a665c', 'hca_prod_67a3de0945b949c3a068ff4665daa50e__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-718fee68', 'hca_prod_6874b7eb344547ec877375141430e169__20231213_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c3f1b532', 'hca_prod_68df3629d2d24eedb0aba10e0f019b88__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-52a537d5', 'hca_prod_6c040a938cf84fd598de2297eb07e9f6__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-038a63e9', 'hca_prod_6e1771950ac0468b99a287de96dc9db4__20230503_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-41271673', 'hca_prod_6e60a555fd954aa28e293ec2ef01a580__20230206_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-62e880a9', 'hca_prod_6f03e4ad93054bfaa5b6929ffb1d94bd__20230313_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-7e726ec8', 'hca_prod_7027adc6c9c946f384ee9badc3a4f53b__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-61b7cf4b', 'hca_prod_71436067ac414acebe1b2fbcc2cb02fa__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-92d177e0', 'hca_prod_71eb5f6dcee04297b503b1125909b8c7__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-7b914023', 'hca_prod_72ff481856924bbc8886e47763531023__20230929_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-997f44d8', 'hca_prod_739ef78aba5d4487a0139982db66d222__20230815_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-af8ea80b', 'hca_prod_74493e9844fc48b0a58fcc7e77268b59__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f8f965ea', 'hca_prod_74b6d5693b1142efb6b1a0454522b4a0__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-ac5d901a', 'hca_prod_74e2ef9d7c9f418cb2817fb38f3b1571__20220906_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-344e8c1f', 'hca_prod_77423e580fbb495a9ec2bd9a8010f21d__20230526_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-d7f7634c', 'hca_prod_77780d5603c0481faade2038490cef9f__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-706c1d29', 'hca_prod_77dedd59137648879bcadc42b56d5b7a__20230201_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-4be2efa6', 'hca_prod_7880637a35a14047b422b5eac2a2a358__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-57a0285d', 'hca_prod_78b2406dbff246fc8b6120690e602227__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-3da5180a', 'hca_prod_78d7805bfdc8472b8058d92cf886f7a4__20220213_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-29f89b5e', 'hca_prod_79351583b21244bab473731bdcddb407__20221208_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-e93f812d', 'hca_prod_79b13a2a9ca142a497bd70208a11bea6__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-811ba6a9', 'hca_prod_7b393e4d65bc4c03b402aae769299329__20220519_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-d6382230', 'hca_prod_7b947aa243a74082afff222a3e3a4635__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-b51e3bae', 'hca_prod_7be050259972493a856f3342a8d1b183__20220606_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-7b552ec1', 'hca_prod_7c75f07c608d4c4aa1b7b13d11c0ad31__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-65cab7fa', 'hca_prod_7dcffc327c8243969a4f88b5579bfe8a__20230905_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-2278b52b', 'hca_prod_7f9766ffbb124279b34078d140bdd7ba__20230905_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-7a1e692a', 'hca_prod_83f5188e3bf749569544cea4f8997756__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f5c9343a', 'hca_prod_842605c7375a47c59e2ca71c2c00fcad__20220117_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-df8b06b3', 'hca_prod_849ed38c591743c4a8f90782241cf10c__20231101_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-9aa73d3d', 'hca_prod_8559a8ed5d8c4fb6bde8ab639cebf03c__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-170f553e', 'hca_prod_85a9263b088748edab1addfa773727b6__20220224_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c4f9feb6', 'hca_prod_8787c23889ef4636a57d3167e8b54a80__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c3077d46', 'hca_prod_88ec040b87054f778f41f81e57632f7d__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-9936e573', 'hca_prod_896f377c8e88463e82b0b2a5409d6fe4__20240201_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-e6850c04', 'hca_prod_8999b4566fa6438bab17b62b1d8ec0c3__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-53696f00', 'hca_prod_8a40ff19e6144c50b23b5c9e1d546bab__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-faf405bc', 'hca_prod_8b954fb2bccb44c584e39f91e9189c40__20230526_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-21732d04', 'hca_prod_8bd2e5f694534b9b9c5659e3a40dc87e__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-4b293efc', 'hca_prod_8c3c290ddfff4553886854ce45f4ba7f__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f45b82af', 'hca_prod_8d566d35d8d34975a351be5e25e9b2ea__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-fc66dedf', 'hca_prod_8dacb243e9184bd2bb9aaac6dc424161__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-64d8f81f', 'hca_prod_8f1f653d3ea14d8eb4a7b97dc852c2b1__20230815_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-950102ed', 'hca_prod_8f630e0f6bf94a04975402533152a954__20220729_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a9b5e225', 'hca_prod_90bd693340c048d48d76778c103bf545__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-d50ff5b7', 'hca_prod_91af6e2f65f244ec98e0ba4e98db22c8__20220303_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-47836ad0', 'hca_prod_92892ab213344b1c976114f5a73548ea__20230616_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-ba72f23e', 'hca_prod_946c5add47d1402a97bba5af97e8bce7__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-bb5be846', 'hca_prod_955dfc2ca8c64d04aa4d907610545d11__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-f6f752c9', 'hca_prod_962bd805eb894c54bad2008e497d1307__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-8c28a376', 'hca_prod_9746f4e0d3b2454389b310288162851b__20230526_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-580e5870', 'hca_prod_9833669bd6944b93a3d06b6f9dbcfc10__20220630_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a613ddaf', 'hca_prod_996120f9e84f409fa01e732ab58ca8b9__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-ff8ecfae', 'hca_prod_9a23ac2d93dd4bac9bb8040e6426db9d__20220906_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-dd8fce65', 'hca_prod_9ac53858606a4b89af49804ccedaa660__20220906_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-e28e8e7f', 'hca_prod_9d97f01f9313416e9b07560f048b2350__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-35f5e987', 'hca_prod_9e3370a0144a49a99e926f6a9290125a__20221101_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-64ce279b', 'hca_prod_a1312f9a01ef40a789bf9091ca76a03a__20220729_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-e7d9713d', 'hca_prod_a27dd61925ad46a0ae0c5c4940a1139b__20220606_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-837925a2', 'hca_prod_a29952d9925e40f48a1c274f118f1f51__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-949ff561', 'hca_prod_a2a2f324cf24409ea859deaee871269c__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c7c0c1c0', 'hca_prod_a39728aa70a04201b0a281b7badf3e71__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-9fe8f7b5', 'hca_prod_a60803bbf7db45cfb52995436152a801__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-3307c8fa', 'hca_prod_a62dae2ecd694d5cb5f84f7e8abdbafa__20220606_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-d1bd4420', 'hca_prod_a7c66eb14a4e4f6c9e30ad2a485f8301__20220906_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-c44d45f6', 'hca_prod_a80a63f2e223489081b0415855b89abc__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-9287b7fb', 'hca_prod_a815c84b8999433f958e422c0720e00d__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-46272509', 'hca_prod_a9301bebe9fa42feb75c84e8a460c733__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-cd174e08', 'hca_prod_a96b71c078a742d188ce83c78925cfeb__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-304658c8', 'hca_prod_a991ef154d4a4b80a93ec538b4b54127__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-dc22e9aa', 'hca_prod_a9c022b4c7714468b769cabcf9738de3__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-b2e26e38', 'hca_prod_a9f5323ace71471c9caf04cc118fd1d7__20220606_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-afaed7b9', 'hca_prod_aa55000c016848d890262d3a76ec8af3__20230427_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-65fbb09a', 'hca_prod_abe1a013af7a45ed8c26f3793c24a1f4__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-41f0fa20', 'hca_prod_ac289b77fb124a6bad43c0721c698e70__20220906_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-943f36a2', 'hca_prod_ad3c5c48ad2843fd9ec09b7a87e0ee6d__20250404_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-b2b70aaf', 'hca_prod_aecfd908674c4d4eb36e0c1ceab02245__20231101_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a3ea9423', 'hca_prod_aefb919243fc46d7a4c129597f7ef61b__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-624d3c74', 'hca_prod_aff9c3cd6b844fc2abf2b9c0b3038277__20220330_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-ccefbdda', 'hca_prod_b4a7d12f6c2f40a39e359756997857e3__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-bfa6cc71', 'hca_prod_b51f49b40d2e4cbdbbd504cd171fc2fa__20220118_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a261829a', 'hca_prod_cb4e5e44d8984819a456393527c3186f__20250404_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-ca45ca22', 'hca_prod_f0f89c1474604bab9d4222228a91f185__20220119_dcp2_20250415_dcp48'), - mksrc('bigquery', 'datarepo-a96f0164', 'hca_prod_fc2a0b4e1e4a447ba09747b398402f37__20250227_dcp2_20250415_dcp48'), -])) - -dcp49_sources = mkdict(dcp48_sources, 501, mkdelta([ - mksrc('bigquery', 'datarepo-f60f4b83', 'hca_prod_08c7910b5ebb4dfca8665bf392ef3b36__20250513_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f2653856', 'hca_prod_4bdaedeb99ae4fb4be6957497cf98b90__20250513_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-2cf88e44', 'hca_prod_b7259878436c4274bfffca76f4cb7892__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-8cc68fbe', 'hca_prod_b733dc1b1d5545e380367eab0821742c__20220519_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-1dd70e59', 'hca_prod_b9484e4edc404e389b854cecf5b8c068__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-7c98f55a', 'hca_prod_bd7104c9a950490e94727d41c6b11c62__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-84d9d771', 'hca_prod_be010abcfb684581b61f7dd7c3d7b044__20230314_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-2b8dcf12', 'hca_prod_bfaedc29fe844e72a46175dc8aabbd1b__20230929_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-9bdfab50', 'hca_prod_c05184453b3b49c6b8fcc41daa4eacba__20220213_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-1c111ed3', 'hca_prod_c0d82ef215044ef09e5ed8a13e45fdec__20220928_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-2c456b41', 'hca_prod_c281ab637b7d4bdfb7619b1baaa18f82__20230331_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f543ac47', 'hca_prod_c302fe54d22d451fa130e24df3d6afca__20220606_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-a97e3c14', 'hca_prod_c31fa434c9ed4263a9b6d9ffb9d44005__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-fa09caaa', 'hca_prod_c412be53cf9547c7980cc0a0caa2d3a0__20230929_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-4d9991fa', 'hca_prod_c41dffbfad83447ca0e113e689d9b258__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-b59ebcd2', 'hca_prod_c5b475f276b34a8e8465f3b69828fec3__20230331_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-b3169144', 'hca_prod_c5f4661568de4cf4bbc2a0ae10f08243__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-7a8210cc', 'hca_prod_c6a50b2a3dfd4ca89b483e682f568a25__20220303_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-06bf8b45', 'hca_prod_c715cd2fdc7c44a69cd5b6a6d9f075ae__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e288a8a0', 'hca_prod_c7c54245548b4d4fb15e0d7e238ae6c8__20220330_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e8e001c9', 'hca_prod_c844538b88544a95bd01aacbaf86d97f__20230427_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c13e222f', 'hca_prod_c8e6c5d9fcde4845beadff96999e3051__20221101_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-4e65ea34', 'hca_prod_cae461deecbd482fa5d411d607fc12ba__20230526_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-d5c5efaf', 'hca_prod_cc35f94ee93b4dbda08c702978d9046f__20231101_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-d3c8089e', 'hca_prod_cc95ff892e684a08a234480eca21ce79__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f14e004d', 'hca_prod_ccd1f1ba74ce469b9fc9f6faea623358__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-fc4216a2', 'hca_prod_ccef38d7aa9240109621c4c7b1182647__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-46924d25', 'hca_prod_cd9d6360ce38432197dff13c79e3cb84__20230206_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-45855706', 'hca_prod_cdc2d2706c99414288839bd95c041d05__20221208_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-dde971bb', 'hca_prod_ce33dde2382d448cb6acbfb424644f23__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-42e1040f', 'hca_prod_ce7b12ba664f4f798fc73de6b1892183__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-4da4cff9', 'hca_prod_cfece4d2f18d44ada46a42bbcb5cb3b7__20230929_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-fe2f151a', 'hca_prod_d012d4768f8c4ff389d6ebbe22c1b5c1__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-83c5cff1', 'hca_prod_d138a1147df54f7d9ff1f79dfd2d428f__20220606_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-9332a1ce', 'hca_prod_d2111fac3fc44f429b6d32cd6a828267__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-ce1acc6b', 'hca_prod_d3a4ceac4d66498497042570c0647a56__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-284edb0c', 'hca_prod_d3ac7c1b53024804b611dad9f89c049d__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-720ff634', 'hca_prod_d5c91e922e7f473d8cf3ab03bbae21c2__20240503_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-5445be31', 'hca_prod_d6225aee8f0e4b20a20c682509a9ea14__20220213_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-57e121b0', 'hca_prod_d71c76d336704774a9cf034249d37c60__20220213_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c8afd733', 'hca_prod_d7845650f6b14b1cb2fec0795416ba7b__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c9b6f66a', 'hca_prod_d7b7beae652b4fc09bf2bcda7c7115af__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e3fb7ab9', 'hca_prod_da2747fa292142e0afd439ef57b2b88b__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-b376d5e4', 'hca_prod_da9d6f243bdf4eaa9e3ff47ce2a65b36__20220729_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f19470c4', 'hca_prod_daf9d9827ce643f6ab51272577290606__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-7101311b', 'hca_prod_dbcd4b1d31bd4eb594e150e8706fa192__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-d036fb75', 'hca_prod_dbd836cfbfc241f0983441cc6c0b235a__20220212_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-03b81808', 'hca_prod_dc1a41f69e0942a6959e3be23db6da56__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-cb23ec44', 'hca_prod_dcc28fb37bab48cebc4b684c00e133ce__20230905_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c86f2d3c', 'hca_prod_dd7f24360c564709bd17e526bba4cc15__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-5b384ff0', 'hca_prod_df88f39f01a84b5b92f43177d6c0f242__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c89c4f2a', 'hca_prod_e0009214c0a04a7b96e2d6a83e966ce0__20220119_dcp2_20250516_dcp49'), - mksrc('bigquery', 'datarepo-af42bae1', 'hca_prod_e0c74c7a20a445059cf138dcdd23011b__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-7ef2dd07', 'hca_prod_e374c1cf73fd4a7a866979dc41714984__20230905_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-509854f1', 'hca_prod_e49e556ada5a442ab45c8691b457623e__20231212_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-4f84a64f', 'hca_prod_e4b18cd28f15490db9f1d118aa067dc3__20221101_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-375ec5ad', 'hca_prod_e526d91dcf3a44cb80c5fd7676b55a1d__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-1904c869', 'hca_prod_e57dc176ab98446b90c289e0842152fd__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-13689713', 'hca_prod_e5d455791f5b48c3b568320d93e7ca72__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-ccd9ee40', 'hca_prod_e6773550c1a6494986431a3154cf2670__20221208_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e5bfc6c9', 'hca_prod_e8808cc84ca0409680f2bba73600cba6__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f21e83a5', 'hca_prod_e88714c22e7849da81465a60b50628b4__20230206_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-faaa90cd', 'hca_prod_e925633fabd9486a81c61a6a66891d23__20230526_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-64aef8e3', 'hca_prod_e993adcdd4ba4f889a05d1c05bdf0c45__20220606_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-f130f625', 'hca_prod_ea9eec5a4fc24c5894d02fcb598732bc__20221208_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-9bbed65a', 'hca_prod_eaefa1b6dae14414953b17b0427d061e__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-8196d213', 'hca_prod_ec6476ee294941f3947b8eef41d6d3ac__20220729_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-8b0f20f6', 'hca_prod_ede2e0b46652464fabbc0b2d964a25a0__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-aa834f98', 'hca_prod_ee166275f63a486481554df86c9de679__20230313_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e0b09b24', 'hca_prod_ef1d9888fa8647a4bb720ab0f20f7004__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-33c8c690', 'hca_prod_ef1e3497515e4bbe8d4c10161854b699__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-8eb1f82a', 'hca_prod_efea6426510a4b609a19277e52bfa815__20220118_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-a1a0ebb1', 'hca_prod_f29b124a85974862ae98ff3a0fd9033e__20220303_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-40656060', 'hca_prod_f2fe82f044544d84b416a885f3121e59__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-09e2f758', 'hca_prod_f3825dfe990a431fb9719c26d39840db__20230331_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-3907996d', 'hca_prod_f48e7c39cc6740559d79bc437892840c__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c1e3cab6', 'hca_prod_f4d011ced1f548a4ab61ae14176e3a6e__20220519_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-9b39eeeb', 'hca_prod_f6133d2a9f3d4ef99c19c23d6c7e6cc0__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-e6f8e5cb', 'hca_prod_f7b464770f2a4bffa9b7719e000499a3__20230815_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-310373da', 'hca_prod_f81efc039f564354aabb6ce819c3d414__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-1b84fc22', 'hca_prod_f83165c5e2ea4d15a5cf33f3550bffde__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-6efa2983', 'hca_prod_f8aa201c4ff145a4890e840d63459ca2__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-69fee559', 'hca_prod_fa3f460f4fb94cedb5488ba6a8ecae3f__20220330_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-6afeebdc', 'hca_prod_faeedcb0e0464be7b1ad80a3eeabb066__20220119_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-6936a062', 'hca_prod_fc381e70df1b407d813152ab523270bd__20221208_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-c394650e', 'hca_prod_fccd3f50cde247bf8972a293b5928aea__20220606_dcp2_20250514_dcp49'), - mksrc('bigquery', 'datarepo-7ae396a0', 'hca_prod_fde199d2a8414ed1aa65b9e0af8969b1__20220330_dcp2_20250514_dcp49'), -])) - -dcp50_sources = mkdict(dcp49_sources, 506, mkdelta([ - mksrc('bigquery', 'datarepo-80eb7b4b', 'hca_prod_2e40df1ed3c248b48a17c46e34d7be59__20250528_dcp2_20250528_dcp50'), - mksrc('bigquery', 'datarepo-109974d4', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20250528_dcp50'), - mksrc('bigquery', 'datarepo-b66195b1', 'hca_prod_6601b3d4ed5a4e1fb54ae36adf45aa28__20250528_dcp2_20250528_dcp50'), - mksrc('bigquery', 'datarepo-8c4fb6f2', 'hca_prod_9ff75ecece81483abb1f85e727f3805b__20250528_dcp2_20250528_dcp50'), - mksrc('bigquery', 'datarepo-7be28950', 'hca_prod_bba2f908e0cb4ee7b007f9c650e08be0__20250528_dcp2_20250528_dcp50'), - mksrc('bigquery', 'datarepo-4c68d77c', 'hca_prod_e889ffc986f644f5b684dfce78e552a3__20250528_dcp2_20250528_dcp50'), -])) - -dcp51_sources = mkdict(dcp50_sources, 513, mkdelta([ - mksrc('bigquery', 'datarepo-42f1b9db', 'hca_prod_0d4aaaac02c344c48ae04465f97f83ed__20221101_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-96fb5b0d', 'hca_prod_16e9915978bc44aab47955a5e903bf50__20221101_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-2cf4df93', 'hca_prod_1c5eaabf075b4b7aa9e607792c2034b3__20231101_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-a1f67b41', 'hca_prod_1ffa222328a64133a5a4badd00faf4bc__20231101_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-642c1565', 'hca_prod_2043c65a1cf84828a6569e247d4e64f1__20220111_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-d6108285', 'hca_prod_2079bb2e676e4bbf8c68f9c6459edcbb__20240327_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-3f39fd30', 'hca_prod_2af52a1365cb4973b51339be38f2df3f__20220111_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-b67c1e0d', 'hca_prod_3d49e5e5976f44cbb6b9079016c31c56__20230614_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-75507299', 'hca_prod_4060444714e44e55ad221fd2d7eb4c68__20250702_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-d4a6add9', 'hca_prod_5bb1f67e2ff04848bbcf17d133f0fd2d__20220117_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-a2be998c', 'hca_prod_6137c3f4261f4192b32e4827a77ff793__20250702_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-fa80d73b', 'hca_prod_73011a86475548ac9f70a28903b4ad77__20230616_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-3e36296c', 'hca_prod_815c5ef50fb14eb798821d160362468e__20240806_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-8896b1cd', 'hca_prod_838d46603d624b08b32ddc5cbd93919d__20240531_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-4f60472d', 'hca_prod_8a666b76daaf4b1f9414e4807a1d1e8b__20220630_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-b80e6888', 'hca_prod_8dcbd84a62434501a6840dcd084bb536__20250702_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-ad8e8c60', 'hca_prod_94023a08611d4f22a8c990956e091b2e__20220118_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-9b90945d', 'hca_prod_9762d70c9b274f578cbc377b9b92ea9b__20241205_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-e7993e15', 'hca_prod_9c20a245f2c043ae82c92232ec6b594f__20220212_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-5d898bf1', 'hca_prod_9f17ed7d93254723a120b00e48db20c0__20230614_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-637990cf', 'hca_prod_9fc0064b84ce40a5a768e6eb3d364ee0__20220630_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-a2779004', 'hca_prod_b486e0d9dd8e462ab6629a5bbad5edae__20250702_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-1f843cbc', 'hca_prod_da74b50760ee4dd1bd02807bb051a337__20230614_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-06170818', 'hca_prod_dbd836cfbfc241f0983441cc6c0b235a__20220212_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-dd62c255', 'hca_prod_ea9f4ea7d7b341e7b23a43f95f569074__20250702_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-82f70d1a', 'hca_prod_f0f89c1474604bab9d4222228a91f185__20220119_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-799d515d', 'hca_prod_f598aee0d269403690e9d6d5b1c84429__20240903_dcp2_20250702_dcp51'), - mksrc('bigquery', 'datarepo-2e8b7553', 'hca_prod_fed95462342044fb8b9d2efbffb35479__20250702_dcp2_20250702_dcp51'), -])) - -dcp52_sources = mkdict(dcp51_sources, 518, mkdelta([ - mksrc('bigquery', 'datarepo-961a8ff8', 'hca_prod_087efc3c26014de6bbe90114593050d1__20241004_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-2e9e61d9', 'hca_prod_21ea8ddb525f4f1fa82031f0360399a2__20220111_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-a33bf04b', 'hca_prod_29ed827bc5394f4cbb6bce8f9173dfb7__20221208_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-7220e6a0', 'hca_prod_35d5b0573daf4ccd8112196194598893__20240903_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-0b71589d', 'hca_prod_455b46e6d8ea4611861ede720a562ada__20220113_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-5f09b54e', 'hca_prod_4bcc16b57a4745bbb9c0be9d5336df2d__20240327_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-0f612c28', 'hca_prod_581de139461f4875b40856453a9082c7__20231212_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-5f99c04c', 'hca_prod_74b6d5693b1142efb6b1a0454522b4a0__20220117_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-e9388599', 'hca_prod_7be050259972493a856f3342a8d1b183__20220606_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-1cc22721', 'hca_prod_8c3c290ddfff4553886854ce45f4ba7f__20220118_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-1b940b9d', 'hca_prod_8fd1609bcd2d4b4dbb9649ae6b8ade2f__20250730_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-48cacb84', 'hca_prod_a9ad734654b443a990557512aa532ba0__20250730_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-18d25baa', 'hca_prod_aca93e287d874aa4b8ae498b9b235f46__20250730_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-f288d111', 'hca_prod_ad3c5c48ad2843fd9ec09b7a87e0ee6d__20250404_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-c1b1899c', 'hca_prod_ad98d3cd26fb4ee399c98a2ab085e737__20220118_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-2a6b7b79', 'hca_prod_b39381584e8d4fdb9e139e94270dde16__20241004_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-06f50646', 'hca_prod_c1810dbc16d245c3b45e3e675f88d87b__20220118_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-45a87f19', 'hca_prod_cea413af79b34f118b48383fe9a65fbe__20230614_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-540987b5', 'hca_prod_df8eb7ce370746afb823e081a562e954__20241205_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-767387a8', 'hca_prod_e0009214c0a04a7b96e2d6a83e966ce0__20220119_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-a5f23e97', 'hca_prod_e5ef5c5fb85647d1b64362c265528060__20250730_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-2a8c1e63', 'hca_prod_f8aa201c4ff145a4890e840d63459ca2__20220119_dcp2_20250801_dcp52'), - mksrc('bigquery', 'datarepo-c14d1f15', 'hca_prod_febdadddad3c4f4a820fade15c48545a__20250730_dcp2_20250801_dcp52'), -])) - -lungmap_sources = mkdict({}, 3, mkdelta([ - mksrc('bigquery', 'datarepo-32f75497', 'lungmap_prod_00f056f273ff43ac97ff69ca10e38c89__20220308_20220308'), - mksrc('bigquery', 'datarepo-7066459d', 'lungmap_prod_1bdcecde16be420888f478cd2133d11d__20220308_20220308'), - mksrc('bigquery', 'datarepo-cfaedae8', 'lungmap_prod_2620497955a349b28d2b53e0bdfcb176__20220308_20220308'), -])) - -lm2_sources = mkdict(lungmap_sources, 5, mkdelta([ - mksrc('bigquery', 'datarepo-5eee9956', 'lungmap_prod_00f056f273ff43ac97ff69ca10e38c89__20220308_20220314_lm2'), - mksrc('bigquery', 'datarepo-73453de6', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20220310_lm2'), - mksrc('bigquery', 'datarepo-360d3b54', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20220608_lm2'), -])) - -lm3_sources = mkdict(lm2_sources, 6, mkdelta([ - mksrc('bigquery', 'datarepo-d139f96d', 'lungmap_prod_1bdcecde16be420888f478cd2133d11d__20220308_20230207_lm3'), - mksrc('bigquery', 'datarepo-0fdfdb69', 'lungmap_prod_6135382f487d4adb9cf84d6634125b68__20230207_20230314_lm3'), -])) - -lm4_sources = mkdict(lm3_sources, 7, mkdelta([ - mksrc('bigquery', 'datarepo-3d684ccd', 'lungmap_prod_00f056f273ff43ac97ff69ca10e38c89__20220308_20231207_lm4'), - mksrc('bigquery', 'datarepo-a65c8237', 'lungmap_prod_1bdcecde16be420888f478cd2133d11d__20220308_20231207_lm4'), - mksrc('bigquery', 'datarepo-43d1f2cb', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20231207_lm4'), - mksrc('bigquery', 'datarepo-91587240', 'lungmap_prod_2620497955a349b28d2b53e0bdfcb176__20220308_20231207_lm4'), - mksrc('bigquery', 'datarepo-252f2a7d', 'lungmap_prod_4ae8c5c91520437198276935661f6c84__20231004_20231207_lm4'), - mksrc('bigquery', 'datarepo-e70d4665', 'lungmap_prod_6135382f487d4adb9cf84d6634125b68__20230207_20231207_lm4'), - mksrc('bigquery', 'datarepo-3f332829', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20231207_lm4'), -])) - -lm6_sources = mkdict(lm4_sources, 8, mkdelta([ - mksrc('bigquery', 'datarepo-c3ad47d2', 'lungmap_prod_6511b041b11e4ccf85932b40148c437e__20240326_20240326_lm6'), -])) - -lm7_sources = mkdict(lm6_sources, 2, mkdelta([ - # @formatter:off - mksrc('bigquery', 'datarepo-3d684ccd', 'lungmap_prod_00f056f273ff43ac97ff69ca10e38c89__20220308_20231207_lm4', pop), # noqa E501 - mksrc('bigquery', 'datarepo-43d1f2cb', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20231207_lm4', pop), # noqa E501 - mksrc('bigquery', 'datarepo-91587240', 'lungmap_prod_2620497955a349b28d2b53e0bdfcb176__20220308_20231207_lm4', pop), # noqa E501 - mksrc('bigquery', 'datarepo-252f2a7d', 'lungmap_prod_4ae8c5c91520437198276935661f6c84__20231004_20231207_lm4', pop), # noqa E501 - mksrc('bigquery', 'datarepo-c3ad47d2', 'lungmap_prod_6511b041b11e4ccf85932b40148c437e__20240326_20240326_lm6', pop), # noqa E501 - mksrc('bigquery', 'datarepo-3f332829', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20231207_lm4', pop), # noqa E501 - # @formatter:on -])) - -lm8_sources = mkdict(lm7_sources, 12, mkdelta([ - mksrc('bigquery', 'datarepo-3f02598d', 'lungmap_prod_00f056f273ff43ac97ff69ca10e38c89__20220308_20250325_lm8'), - mksrc('bigquery', 'datarepo-2b15227b', 'lungmap_prod_1977dc4784144263a8706b0f207d8ab3__20240206_20241002_lm8'), - mksrc('bigquery', 'datarepo-c9158593', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20241002_lm8'), - mksrc('bigquery', 'datarepo-2f143f27', 'lungmap_prod_2620497955a349b28d2b53e0bdfcb176__20220308_20250325_lm8'), - mksrc('bigquery', 'datarepo-35a6d7ca', 'lungmap_prod_3a02d15f9c6a4ef7852b4ddec733b70b__20241001_20241002_lm8'), - mksrc('bigquery', 'datarepo-131a1234', 'lungmap_prod_4ae8c5c91520437198276935661f6c84__20231004_20241002_lm8'), - mksrc('bigquery', 'datarepo-936db385', 'lungmap_prod_6135382f487d4adb9cf84d6634125b68__20230207_20241106_lm8'), - mksrc('bigquery', 'datarepo-42daf980', 'lungmap_prod_6511b041b11e4ccf85932b40148c437e__20240326_20250325_lm8'), - mksrc('bigquery', 'datarepo-3c4905d2', 'lungmap_prod_834e0d1671b64425a8ab022b5000961c__20241001_20241002_lm8'), - mksrc('bigquery', 'datarepo-d7447983', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20241002_lm8'), - mksrc('bigquery', 'datarepo-c11ef363', 'lungmap_prod_fdadee7e209745d5bf81cc280bd8348e__20240206_20241002_lm8') -])) - -lm9_sources = mkdict(lm8_sources, 19, mkdelta([ - mksrc('bigquery', 'datarepo-6a2dd798', 'lungmap_prod_0229ea32ef02489eb11eff15819e22c1__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-1635e8d0', 'lungmap_prod_1977dc4784144263a8706b0f207d8ab3__20240206_20250521_lm9'), - mksrc('bigquery', 'datarepo-753403ae', 'lungmap_prod_1f00915122064294929a9c720f19a13f__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-13f2c938', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20250521_lm9'), - mksrc('bigquery', 'datarepo-aa49b92f', 'lungmap_prod_4ae8c5c91520437198276935661f6c84__20231004_20250521_lm9'), - mksrc('bigquery', 'datarepo-27a7736d', 'lungmap_prod_4fb3669262404146a7e654543543230c__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-73797ea1', 'lungmap_prod_5134c97dbbdb4d9db483cdebeaf667b3__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-4f6af8a7', 'lungmap_prod_ccea5717a8484c068d2dd1b694e89441__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-9c6a9693', 'lungmap_prod_e651ac3e03aa46fa9ed370c6fda4e440__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-35ee2bca', 'lungmap_prod_e68d2111316f4dedbf490ab332488665__20250415_20250521_lm9'), - mksrc('bigquery', 'datarepo-79fb8a00', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20250521_lm9'), - mksrc('bigquery', 'datarepo-c10ea239', 'lungmap_prod_fdadee7e209745d5bf81cc280bd8348e__20240206_20250521_lm9'), -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `prod` (short for production) deployment here. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - 'AZUL_DEPLOYMENT_STAGE': 'prod', - - 'AZUL_DOMAIN_NAME': 'azul.data.humancellatlas.org', - - 'AZUL_CATALOGS': base64.b64encode(bz2.compress(json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='hca'), - repository=dict(name='tdr_hca')), - sources=mklist(sources)) - for atlas, catalog, sources in [ - ('hca', 'dcp51', dcp51_sources), - ('hca', 'dcp52', dcp52_sources), - ('lungmap', 'lm9', lm9_sources) - ] for suffix, internal in [ - ('', False), - ('-it', True) - ] - }).encode())).decode('ascii'), - - 'AZUL_TDR_SOURCE_LOCATION': 'US', - 'AZUL_TDR_SERVICE_URL': 'https://data.terra.bio', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-prod.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-prod.broadinstitute.org', - - 'AZUL_ENABLE_MONITORING': '1', - - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.xlarge.search', - 'AZUL_ES_INSTANCE_COUNT': '6', - - 'AZUL_CONTRIBUTION_CONCURRENCY': '300/64', - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'hca', - - 'AZUL_OWNER': 'hannes@ucsc.edu', - - 'AZUL_MONITORING_EMAIL': 'azul-group@ucsc.edu', - - 'AZUL_AWS_ACCOUNT_ID': '542754589326', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-hca-prod', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '473200283737-h5e1l7neunbuesrtgjf8b12lb7o3jf1m.apps.googleusercontent.com', - - 'azul_slack_integration': json.dumps({ - 'workspace_id': 'T09P9H91S', # ucsc-gi.slack.com - 'channel_id': 'C04JWDFCPFZ' # #team-boardwalk-prod - }), - - 'AZUL_ENABLE_REPLICAS': '1', - - 'AZUL_ENABLE_VERBATIM_RELATIONS': '0', - - 'AZUL_ENABLE_MIRRORING': '1', - - 'AZUL_MIRROR_BUCKET': 'humancellatlas', - - 'AZUL_MIRRORING_CONCURRENCY': '128' - } diff --git a/deployments/sandbox/.example.environment.local.py b/deployments/sandbox/.example.environment.local.py deleted file mode 120000 index 45b1b10142..0000000000 --- a/deployments/sandbox/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../dev/.example.environment.local.py \ No newline at end of file diff --git a/deployments/sandbox/environment.py b/deployments/sandbox/environment.py deleted file mode 100644 index 7d46df1219..0000000000 --- a/deployments/sandbox/environment.py +++ /dev/null @@ -1,283 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -is_sandbox = True - -pop = 2 # remove snapshot - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - _, env, project, _ = snapshot.split('_', 3) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -dcp2_sources = mkdict({}, 105, mkdelta([ - mksrc('bigquery', 'datarepo-dev-a9252919', 'hca_dev_005d611a14d54fbf846e571a1f874f70__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-c148d39c', 'hca_dev_027c51c60719469fa7f5640fe57cbece__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-e2ab8487', 'hca_dev_03c6fce7789e4e78a27a664d562bb738__20210902_20210907'), - mksrc('bigquery', 'datarepo-dev-37639c56', 'hca_dev_05657a599f9d4bb9b77b24be13aa5cea__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-9f4012c9', 'hca_dev_05be4f374506429bb112506444507d62__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-baa2812f', 'hca_dev_0792db3480474e62802c9177c9cd8e28__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-38e08b5c', 'hca_dev_08b794a0519c4516b184c583746254c5__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-2749da57', 'hca_dev_091cf39b01bc42e59437f419a66c8a45__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-eab7fa76', 'hca_dev_0c3b7785f74d40918616a68757e4c2a8__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-fef02a92', 'hca_dev_0d4b87ea6e9e456982e41343e0e3259f__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-78bae095', 'hca_dev_0fd8f91862d64b8bac354c53dd601f71__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-ff0a2fe7', 'hca_dev_116965f3f09447699d28ae675c1b569c__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-4de1b9fd', 'hca_dev_16ed4ad8731946b288596fe1c1d73a82__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-135f340c', 'hca_dev_1c6a960d52ac44eab728a59c7ab9dc8e__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-86c60513', 'hca_dev_1cd1f41ff81a486ba05b66ec60f81dcf__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-f5321179', 'hca_dev_1ce3b3dc02f244a896dad6d107b27a76__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-76de829d', 'hca_dev_2043c65a1cf84828a6569e247d4e64f1__20210831_20210907'), - mksrc('bigquery', 'datarepo-dev-c808badb', 'hca_dev_2086eb0510b9432bb7f0169ccc49d270__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-23782220', 'hca_dev_23587fb31a4a4f58ad74cc9a4cb4c254__20210827_20210909'), - mksrc('bigquery', 'datarepo-dev-4c3e6011', 'hca_dev_248fcf0316c64a41b6ccaad4d894ca42__20210907_20210907'), - mksrc('bigquery', 'datarepo-dev-1c2c69d9', 'hca_dev_24c654a5caa5440a8f02582921f2db4a__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-38f08cd8', 'hca_dev_2a64db431b554639aabb8dba0145689d__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-4cf05ce2', 'hca_dev_2a72a4e566b2405abb7c1e463e8febb0__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-3041c2cf', 'hca_dev_2af52a1365cb4973b51339be38f2df3f__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-311340f6', 'hca_dev_2d8460958a334f3c97d4585bafac13b4__20210902_20210907'), - mksrc('bigquery', 'datarepo-dev-766bfb76', 'hca_dev_2ef3655a973d4d699b4121fa4041eed7__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-1720b3c5', 'hca_dev_379ed69ebe0548bcaf5ea7fc589709bf__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-ac6efd3f', 'hca_dev_38449aea70b540db84b31e08f32efe34__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-40283c27', 'hca_dev_3a69470330844ece9abed935fd5f6748__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-b08233fa', 'hca_dev_3cfcdff5dee14a7ba591c09c6e850b11__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-bdc9f342', 'hca_dev_3e329187a9c448ec90e3cc45f7c2311c__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-ec07c8d8', 'hca_dev_4037007b0eff4e6db7bd8dd8eec80143__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-c0ec174a', 'hca_dev_403c3e7668144a2da5805dd5de38c7ff__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-31b3553a', 'hca_dev_414accedeba0440fb721befbc5642bef__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-b4789901', 'hca_dev_41fb1734a121461695c73b732c9433c7__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-4e5ffd52', 'hca_dev_42d4f8d454224b78adaee7c3c2ef511c__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-5ef7f2e2', 'hca_dev_455b46e6d8ea4611861ede720a562ada__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-a6c6b953', 'hca_dev_4bec484dca7a47b48d488830e06ad6db__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-f31edbc2', 'hca_dev_4d6f6c962a8343d88fe10f53bffd4674__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-bb8fbae4', 'hca_dev_51f02950ee254f4b8d0759aa99bb3498__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-71de019e', 'hca_dev_520afa10f9d24e93ab7a26c4c863ce18__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-ffcf8b00', 'hca_dev_52b29aa4c8d642b4807ab35be94469ca__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-f76414c8', 'hca_dev_52d10a60c8d14d068a5eaf0d5c0d5034__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-319b80f7', 'hca_dev_577c946d6de54b55a854cd3fde40bff2__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-279f1986', 'hca_dev_5ee710d7e2d54fe2818d15f5e31dae32__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-0abea017', 'hca_dev_6072616c87944b208f52fb15992ea5a4__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-da221b1a', 'hca_dev_60ea42e1af4942f58164d641fdb696bc__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-c3d623dc', 'hca_dev_63b5b6c1bbcd487d8c2e0095150c1ecd__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-ecb9c129', 'hca_dev_67a3de0945b949c3a068ff4665daa50e__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-3545971c', 'hca_dev_71436067ac414acebe1b2fbcc2cb02fa__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-12b7a9e1', 'hca_dev_7880637a35a14047b422b5eac2a2a358__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-7913b094', 'hca_dev_78b2406dbff246fc8b6120690e602227__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-4747d8fe', 'hca_dev_7adede6a0ab745e69b67ffe7466bec1f__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-aba01389', 'hca_dev_7b947aa243a74082afff222a3e3a4635__20210831_20210907'), - mksrc('bigquery', 'datarepo-dev-bf3a4c8a', 'hca_dev_8185730f411340d39cc3929271784c2b__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-560ee3d1', 'hca_dev_83f5188e3bf749569544cea4f8997756__20210929_20211007'), - mksrc('bigquery', 'datarepo-dev-bd995e95', 'hca_dev_842605c7375a47c59e2ca71c2c00fcad__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-d0772077', 'hca_dev_8787c23889ef4636a57d3167e8b54a80__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-8eb2ffd1', 'hca_dev_87d52a86bdc7440cb84d170f7dc346d9__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-0c5c20b5', 'hca_dev_8c3c290ddfff4553886854ce45f4ba7f__20210902_20210907'), - mksrc('bigquery', 'datarepo-dev-29509483', 'hca_dev_90bd693340c048d48d76778c103bf545__20210827_20211110'), - mksrc('bigquery', 'datarepo-dev-59d37b9a', 'hca_dev_946c5add47d1402a97bba5af97e8bce7__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-788c3b52', 'hca_dev_955dfc2ca8c64d04aa4d907610545d11__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-4b88b45b', 'hca_dev_962bd805eb894c54bad2008e497d1307__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-02c59b72', 'hca_dev_99101928d9b14aafb759e97958ac7403__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-a6312a94', 'hca_dev_992aad5e7fab46d9a47ddf715e8cfd24__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-75589244', 'hca_dev_996120f9e84f409fa01e732ab58ca8b9__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-d4b988d6', 'hca_dev_a004b1501c364af69bbd070c06dbc17d__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-9ec7beb6', 'hca_dev_a29952d9925e40f48a1c274f118f1f51__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-d3d5bbfa', 'hca_dev_a39728aa70a04201b0a281b7badf3e71__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-7b7daff7', 'hca_dev_a96b71c078a742d188ce83c78925cfeb__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-58610528', 'hca_dev_a9c022b4c7714468b769cabcf9738de3__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-1dce87e5', 'hca_dev_ae71be1dddd84feb9bed24c3ddb6e1ad__20210916_20210916'), - mksrc('bigquery', 'datarepo-dev-b2004d1c', 'hca_dev_b4a7d12f6c2f40a39e359756997857e3__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-0b465564', 'hca_dev_b51f49b40d2e4cbdbbd504cd171fc2fa__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-cd97e83a', 'hca_dev_b963bd4b4bc14404842569d74bc636b8__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-376d3f4a', 'hca_dev_bd40033154b94fccbff66bb8b079ee1f__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-aa783adb', 'hca_dev_c5f4661568de4cf4bbc2a0ae10f08243__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-990234a8', 'hca_dev_c6ad8f9bd26a4811b2ba93d487978446__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-61b8d081', 'hca_dev_c715cd2fdc7c44a69cd5b6a6d9f075ae__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-71926fdc', 'hca_dev_c893cb575c9f4f26931221b85be84313__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-2f4bfe5d', 'hca_dev_ccd1f1ba74ce469b9fc9f6faea623358__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-aa8357fb', 'hca_dev_ccef38d7aa9240109621c4c7b1182647__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-24e672db', 'hca_dev_cddab57b68684be4806f395ed9dd635a__20210831_20210907'), - mksrc('bigquery', 'datarepo-dev-f4cb2365', 'hca_dev_ce33dde2382d448cb6acbfb424644f23__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-0d6f73ac', 'hca_dev_d012d4768f8c4ff389d6ebbe22c1b5c1__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-5674b4eb', 'hca_dev_d2111fac3fc44f429b6d32cd6a828267__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-b3632667', 'hca_dev_d3446f0c30f34a12b7c36af877c7bb2d__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-92c3a1de', 'hca_dev_d3a4ceac4d66498497042570c0647a56__20210831_20210903'), - mksrc('bigquery', 'datarepo-dev-e5bc6d76', 'hca_dev_d3ac7c1b53024804b611dad9f89c049d__20210827_20211015'), - mksrc('bigquery', 'datarepo-dev-dbc582d9', 'hca_dev_dbcd4b1d31bd4eb594e150e8706fa192__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-848e2d4f', 'hca_dev_dbd836cfbfc241f0983441cc6c0b235a__20210827_20210902'), - mksrc('bigquery', 'datarepo-dev-3b058b81', 'hca_dev_dc1a41f69e0942a6959e3be23db6da56__20210827_20220228_dcp14'), - mksrc('bigquery', 'datarepo-dev-27ad01e5', 'hca_dev_df88f39f01a84b5b92f43177d6c0f242__20210827_20210928'), - mksrc('bigquery', 'datarepo-dev-b839d6c7', 'hca_dev_e526d91dcf3a44cb80c5fd7676b55a1d__20210902_20210907'), - mksrc('bigquery', 'datarepo-dev-3faef568', 'hca_dev_e5d455791f5b48c3b568320d93e7ca72__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-e304a8fe', 'hca_dev_e77fed30959d4fadbc15a0a5a85c21d2__20210830_20210903'), - mksrc('bigquery', 'datarepo-dev-6fdac3db', 'hca_dev_e8808cc84ca0409680f2bba73600cba6__20210902_20210907'), - mksrc('bigquery', 'datarepo-dev-dbc3e131', 'hca_dev_eaefa1b6dae14414953b17b0427d061e__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-b51e6694', 'hca_dev_f48e7c39cc6740559d79bc437892840c__20210830_20211007'), - mksrc('bigquery', 'datarepo-dev-10f0610a', 'hca_dev_f81efc039f564354aabb6ce819c3d414__20210827_20210903'), - mksrc('bigquery', 'datarepo-dev-24e9529e', 'hca_dev_f83165c5e2ea4d15a5cf33f3550bffde__20210901_20210908'), - mksrc('bigquery', 'datarepo-dev-67240cf2', 'hca_dev_f86f1ab41fbb4510ae353ffd752d4dfc__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-e8e0a59a', 'hca_dev_f8aa201c4ff145a4890e840d63459ca2__20210901_20210903'), - mksrc('bigquery', 'datarepo-dev-96d8e08c', 'hca_dev_faeedcb0e0464be7b1ad80a3eeabb066__20210831_20210903'), -])) - -dcp3_sources = mkdict(dcp2_sources, 104, mkdelta([ - mksrc('bigquery', 'datarepo-dev-3d6d24ad', 'hca_dev_05657a599f9d4bb9b77b24be13aa5cea__20210827_20230215'), - mksrc('bigquery', 'datarepo-dev-76de829d', 'hca_dev_2043c65a1cf84828a6569e247d4e64f1__20210831_20210907', pop), - mksrc('bigquery', 'datarepo-dev-b46086a9', 'hca_dev_51f02950ee254f4b8d0759aa99bb3498__20210827_20230215'), - mksrc('bigquery', 'datarepo-dev-27c50fbc', 'hca_dev_c5f4661568de4cf4bbc2a0ae10f08243__20210827_20230215'), - mksrc('bigquery', 'datarepo-dev-2a4ab485', 'hca_dev_d2111fac3fc44f429b6d32cd6a828267__20210830_20230215'), - mksrc('bigquery', 'datarepo-dev-1005632d', 'hca_dev_d3ac7c1b53024804b611dad9f89c049d__20210827_20230215'), - mksrc('bigquery', 'datarepo-dev-8709b362', 'hca_dev_df88f39f01a84b5b92f43177d6c0f242__20210827_20230215'), - mksrc('bigquery', 'datarepo-dev-e1712bfa', 'hca_dev_e526d91dcf3a44cb80c5fd7676b55a1d__20210902_20230215'), -])) - -lungmap_sources = mkdict({}, 2, mkdelta([ - mksrc('bigquery', 'datarepo-dev-5d9526e0', 'lungmap_dev_1bdcecde16be420888f478cd2133d11d__20220401_20220404'), - mksrc('bigquery', 'datarepo-dev-8de6d66b', 'lungmap_dev_2620497955a349b28d2b53e0bdfcb176__20220404_20220404') -])) - -lm2_sources = mkdict(lungmap_sources, 3, mkdelta([ - mksrc('bigquery', 'datarepo-dev-b47b6759', 'lungmap_dev_00f056f273ff43ac97ff69ca10e38c89__20220404_20220404_lm2'), -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `sandbox` deployment here. The sandbox is used - # to run integration tests against PRs and to perform CI/CD experiments. - # - # You can use this file as a template for a personal deployment. Look - # for conditionals using the `is_sandbox` variable and adjust the `else` - # branch accordingly. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - # When using this file as a template for a personal deployment, replace - # `None` with a short string that is specific to YOU. - # - 'AZUL_DEPLOYMENT_STAGE': 'sandbox' if is_sandbox else None, - - # This deployment uses a subdomain of the `dev` deployment's domain. - # - 'AZUL_DOMAIN_NAME': 'dev.singlecell.gi.ucsc.edu', - 'AZUL_SUBDOMAIN_TEMPLATE': '*.{AZUL_DEPLOYMENT_STAGE}', - 'AZUL_DRS_DOMAIN_NAME': 'drs.{AZUL_DEPLOYMENT_STAGE}.dev.singlecell.gi.ucsc.edu', - - 'AZUL_CATALOGS': json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='hca'), - repository=dict(name='tdr_hca')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('hca', 'dcp3', dcp3_sources), - ('lungmap', 'lungmap', lungmap_sources), - ('lungmap', 'lm2', lm2_sources) - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://jade.datarepo-dev.broadinstitute.org', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-dev.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-dev.broadinstitute.org', - - **( - { - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.large.search', - 'AZUL_ES_INSTANCE_COUNT': '2', - } if is_sandbox else { - # Personal deployments share an ES domain with `sandbox` - 'AZUL_SHARE_ES_DOMAIN': '1', - 'AZUL_ES_DOMAIN': 'azul-index-sandbox', - # Personal deployments use fewer Lambda invocations in parallel. - 'AZUL_CONTRIBUTION_CONCURRENCY': '8', - 'AZUL_AGGREGATION_CONCURRENCY': '8', - } - ), - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'hca', - - # When using this file as a template for a personal deployment, change - # `None` to a string containing YOUR email address. - # - 'AZUL_OWNER': 'hannes@ucsc.edu' if is_sandbox else None, - - 'AZUL_MONITORING_EMAIL': '{AZUL_OWNER}', - - 'AZUL_AWS_ACCOUNT_ID': '122796619775', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-hca-dev', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '713613812354-3bj4m7vnsbco82bke96idvg8cpdv6r9r.apps.googleusercontent.com', - - 'AZUL_ENABLE_MIRRORING': '1', - } diff --git a/deployments/tempdev.browser/environment.py b/deployments/tempdev.browser/environment.py deleted file mode 100644 index 5841567270..0000000000 --- a/deployments/tempdev.browser/environment.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'browser', - 'azul_browser_sites': json.dumps({ - 'browser': { - 'zone': '{AZUL_DOMAIN_NAME}', - 'domain': '{AZUL_DOMAIN_NAME}', - 'project': 'ucsc/data-browser', - 'branch': 'ucsc/anvil/tempdev', - 'tarball_name': 'anvil', - 'tarball_path': 'out', - 'real_path': '' - } - }) - } diff --git a/deployments/tempdev.gitlab/environment.py b/deployments/tempdev.gitlab/environment.py deleted file mode 100644 index edb1f5e066..0000000000 --- a/deployments/tempdev.gitlab/environment.py +++ /dev/null @@ -1,31 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'gitlab', - 'azul_vpc_cidr': '172.25.0.0/16', - 'azul_vpn_subnet': '10.46.0.0/16' - } diff --git a/deployments/tempdev.shared/environment.py b/deployments/tempdev.shared/environment.py deleted file mode 100644 index 5705e22099..0000000000 --- a/deployments/tempdev.shared/environment.py +++ /dev/null @@ -1,38 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Optional, -) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - 'azul_terraform_component': 'shared', - 'azul_aws_support_roles': json.dumps(['administrator', 'developer']), - - 'azul_security_contact': json.dumps({ - 'name': 'Hannes Schmidt', - 'title': 'Tech lead', - 'email_address': 'azul-group@ucsc.edu', - 'phone_number': '831-454-8200' - }), - } diff --git a/deployments/tempdev/.example.environment.local.py b/deployments/tempdev/.example.environment.local.py deleted file mode 120000 index 3d88e15943..0000000000 --- a/deployments/tempdev/.example.environment.local.py +++ /dev/null @@ -1 +0,0 @@ -../anvildev/.example.environment.local.py \ No newline at end of file diff --git a/deployments/tempdev/environment.py b/deployments/tempdev/environment.py deleted file mode 100644 index d30adcc701..0000000000 --- a/deployments/tempdev/environment.py +++ /dev/null @@ -1,138 +0,0 @@ -from collections.abc import ( - Mapping, -) -import json -from typing import ( - Literal, - Optional, -) - -pop = 1 # remove snapshot - - -def mksrc(source_type: Literal['bigquery', 'parquet'], - google_project, - snapshot, - flags: int = 0, - /, - prefix: str = '' - ) -> tuple[str, str | None]: - project = '_'.join(snapshot.split('_')[1:-3]) - assert flags <= pop - source = None if flags & pop else ':'.join([ - 'tdr', - source_type, - 'gcp', - google_project, - snapshot, - prefix - ]) - return project, source - - -def mkdelta(items: list[tuple[str, str]]) -> dict[str, str]: - result = dict(items) - assert len(items) == len(result), 'collisions detected' - assert list(result.keys()) == sorted(result.keys()), 'input not sorted' - return result - - -def mklist(catalog: dict[str, str]) -> list[str]: - return list(filter(None, catalog.values())) - - -def mkdict(previous_catalog: dict[str, str], - num_expected: int, - delta: dict[str, str] - ) -> dict[str, str]: - catalog = previous_catalog | delta - num_actual = len(mklist(catalog)) - assert num_expected == num_actual, (num_expected, num_actual) - return catalog - - -anvil_sources = mkdict({}, 3, mkdelta([ - mksrc('bigquery', 'datarepo-dev-e53e74aa', 'ANVIL_1000G_2019_Dev_20230609_ANV5_202306121732'), - mksrc('bigquery', 'datarepo-dev-42c70e6a', 'ANVIL_CCDG_Sample_1_20230228_ANV5_202302281520'), - mksrc('bigquery', 'datarepo-dev-97ad270b', 'ANVIL_CMG_Sample_1_20230225_ANV5_202302281509') -])) - - -def env() -> Mapping[str, Optional[str]]: - """ - Returns a dictionary that maps environment variable names to values. The - values are either None or strings. String values can contain references to - other environment variables in the form `{FOO}` where FOO is the name of an - environment variable. See - - https://docs.python.org/3.12/library/string.html#format-string-syntax - - for the concrete syntax. These references will be resolved *after* the - overall environment has been compiled by merging all relevant - `environment.py` and `environment.local.py` files. - - Entries with a `None` value will be excluded from the environment. They - can be used to document a variable without a default value in which case - other, more specific `environment.py` or `environment.local.py` files must - provide the value. - """ - return { - # Set variables for the `anvildev` (short for AnVIL development) - # deployment here. - # - # Only modify this file if you intend to commit those changes. To apply - # a setting that's specific to you AND the deployment, create an - # `environment.local.py` file right next to this one and apply that - # setting there. Settings that are applicable to all environments but - # specific to you go into `environment.local.py` at the project root. - - 'AZUL_DEPLOYMENT_STAGE': 'tempdev', - - 'AZUL_DOMAIN_NAME': 'temp.gi.ucsc.edu', - - 'AZUL_S3_BUCKET': 'edu-ucsc-gi-platform-temp-dev-storage-{AZUL_DEPLOYMENT_STAGE}.{AWS_DEFAULT_REGION}', - - 'AZUL_CATALOGS': json.dumps({ - f'{catalog}{suffix}': dict(atlas=atlas, - internal=internal, - plugins=dict(metadata=dict(name='anvil'), - repository=dict(name='tdr_anvil')), - sources=list(filter(None, sources.values()))) - for atlas, catalog, sources in [ - ('anvil', 'anvil', anvil_sources), - ] - for suffix, internal in [ - ('', False), - ('-it', True) - ] - }), - - 'AZUL_TDR_SOURCE_LOCATION': 'us-central1', - 'AZUL_TDR_SERVICE_URL': 'https://jade.datarepo-dev.broadinstitute.org', - 'AZUL_SAM_SERVICE_URL': 'https://sam.dsde-dev.broadinstitute.org', - 'AZUL_DUOS_SERVICE_URL': 'https://consent.dsde-dev.broadinstitute.org', - 'AZUL_TERRA_SERVICE_URL': 'https://firecloud-orchestration.dsde-dev.broadinstitute.org', - - 'AZUL_ENABLE_MONITORING': '1', - - # $0.191/h × 2 × 24h/d × 30d/mo = $275.08/mo - 'AZUL_ES_INSTANCE_TYPE': 'r6gd.large.search', - 'AZUL_ES_INSTANCE_COUNT': '2', - - 'AZUL_DEBUG': '1', - - 'AZUL_BILLING': 'anvil', - - 'AZUL_OWNER': 'hannes@ucsc.edu', - - 'AZUL_MONITORING_EMAIL': 'azul-group@ucsc.edu', - - 'AZUL_AWS_ACCOUNT_ID': '654654270592', - 'AWS_DEFAULT_REGION': 'us-east-1', - - 'GOOGLE_PROJECT': 'platform-temp-dev', - - 'AZUL_DEPLOYMENT_INCARNATION': '1', - - 'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '807674395527-erth0gf1m7qme5pe6bu384vpdfjh06dg.apps.googleusercontent.com', - } diff --git a/docker_images.json b/docker_images.json deleted file mode 100644 index 0c16abf732..0000000000 --- a/docker_images.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "docker.io/library/docker:28.3.3": { - "digest": "sha256:c0872aae4791ff427e6eda52769afa04f17b5cf756f8267e0d52774c99d5c9de", - "mirror_digest": "sha256:8540613af2771eff7cc76188d1e2071e5800efcc17b2b640e4f2dd84d5ddd11d", - "parts": { - "linux/amd64": { - "digest": "sha256:acf2e2d09cedf21fa8f27bb0962674e33e159c744c152b248f1f7f43623ccd82", - "id": "sha256:b2a8bf985f76be7345c0c827c568db287ccb4dfad919a4693488224e3644fb0c", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:88bf2ba4673e2963049952ad60d46389e6d2be3d31f0f4d1d69c55e0712d22a4", - "id": "sha256:e0680e652cec44e947fd5401c80526cad1e184420cc2645f6399d102127e5bbf", - "platform": "linux/arm64" - } - } - }, - "docker.io/library/python:3.12.11-slim-bookworm": { - "digest": "sha256:3a4908d9f85cf155ef5847883c247e420d7f9d2686027f0575ac44c2b6db9782", - "mirror_digest": "sha256:7ad2683bddd96dd1cdd2fa74c17a06d00a4df93a63c450bc1b51190e7a8dd7bd", - "parts": { - "linux/amd64": { - "digest": "sha256:4084f1a51b5bb5f4cbf6ae741f72dd9b9d6d11a398830ec6550f5b43e5dfb3aa", - "id": "sha256:78b97569762e7aa9cadd22d089be7ac6ca075be195b3e1b910c9379e414cbe82", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:5dcd6be9c9367c5d394e1a7cc739901b056a6947017a146aded3ba99daa2f85d", - "id": "sha256:0791981c53ac9ef47abb38ea94f6d74a9e9e6781a0f5b4302d98c598d2626fc9", - "platform": "linux/arm64" - } - } - }, - "docker.io/ucscgi/azul-pycharm:2024.3.6-59": { - "digest": "sha256:1077be5c5a440a1cd1392e2b7e046911c2fb54c6b3aeab952ad8787213e572c6", - "mirror_digest": "sha256:f7a9816d85760968b91383786c0edecc5cd6932f1508d72f0077bee04a351402", - "parts": { - "linux/amd64": { - "digest": "sha256:4234d37768aa6ec02b6ed7682d61fbdec8e28795c7ca0f12763b120cb2e1f228", - "id": "sha256:16bb6b7c0ccdf72259c8aab94e1d7a46218ef7cf27508e9dea1d13b14f30c7fd", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:ed064994264eb388d2ea509025133ab415788d42def75b7f5406e76749f29cbd", - "id": "sha256:5bd1243a2afe36d5762ed9bae78fdf2ff5a7a427ee2834831a47e7dbadf8e192", - "platform": "linux/arm64" - } - } - }, - "docker.io/opensearchproject/opensearch:2.19.3": { - "digest": "sha256:a54d2deebe965848afc0d06f5088baa0fbf77fedb3a2c62de9e032eb6d6aaf7f", - "mirror_digest": "sha256:51ad0959df798111b5513b056d9a6b83898092a0854da05ec3027f108a7696e1", - "parts": { - "linux/amd64": { - "digest": "sha256:9c21f88114b185897a88cfd69cb5f02bc3c30502b195fa573af158565a36b32c", - "id": "sha256:b9713a137fd77d837349b41830c8aaba72904afbbcaa5f79dd0f7a74f66b31e6", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:b1e8187a62da244c0bca4c7724d02baf0ccc13c16dacb209f34b854d381097f2", - "id": "sha256:857be6bf6afae22a2baa8976ceaef2924cee2f9ef82924be19cb2ae6444aa7a7", - "platform": "linux/arm64" - } - } - }, - "docker.io/ucscgi/azul-bigquery-emulator:0.4.4-41": { - "digest": "sha256:d504cc667ab5426d81079c97a8233c8f4e8a0e81e83c6f6380517381788f9d97", - "mirror_digest": "sha256:276d06ad268830bf55b279ffd58d35e6df493b314c8e8f636c89b4079d202269", - "parts": { - "linux/amd64": { - "digest": "sha256:9f00b8e64a0fafa964c34d373899828f7abb736374a1cab849be01c64c85ce71", - "id": "sha256:186c565ee10f3301f23988d689ffc8d23ad2e7e8c96e18414616215a51e25fac", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:d2f16251ae22e2fe9f7aa3e0e59cf26aefe3396c98b104043f6b169126d87532", - "id": "sha256:e2af0ee37403e05d7035cdb6f9381ca088ee2b97062aca7c175b5def3094e515", - "platform": "linux/arm64" - } - } - }, - "docker.io/clamav/clamav:1.4.3-51": { - "digest": "sha256:eeef3abc9720c350d552f650ca2f1c3a987ebd8a531019aea1effc6f38d68861", - "mirror_digest": "sha256:58bf974f44e3231c0d18ea6cc72d34f9fc22153028e328b12e0abe139ed28835", - "parts": { - "linux/amd64": { - "digest": "sha256:1179cfb8bd9f7d497ecd86df4d8d88c165300300c1c3ac96db2f9c3d3a6ad7dc", - "id": "sha256:73d4b29c677cb5bc06fe87ef6973abdb7b1ffd1ea1390003716e286990b97bb7", - "platform": "linux/amd64" - } - } - }, - "docker.io/gitlab/gitlab-ce:18.2.2-ce.0": { - "digest": "sha256:ae39a4eb025f8138ecf63dda1dbdd3d4759e8f3cea86beb80e5a80b355dfe708", - "mirror_digest": "sha256:5a23d4addec672e0b68ec28ac6ce3e2db36e7e5398b91c1dff1f468fba861e42", - "parts": { - "linux/amd64": { - "digest": "sha256:104ad65c84276deeb8ed04cc35c29b327f41b6590b3f4a43c9292c92f7c9c16f", - "id": "sha256:8aad4c87c3d469f57b912c4ce3ab2e2a8b7277d5ba2e76fe2e2134f85e4f6045", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:32d48d34af09070e4fe8479683d6280c5fd6711890e633ac94fada4035e3b756", - "id": "sha256:0907db568bd88ab8550b6d912aba75fdd20be2a937367b77837a621682f609d8", - "platform": "linux/arm64" - } - } - }, - "docker.io/gitlab/gitlab-runner:ubuntu-v18.2.1": { - "digest": "sha256:e37142793d94abb8e66258d43d622b9d132bbcc1c3a6b4f9cea7dd0e8d839996", - "mirror_digest": "sha256:bd557b08032cc5582d2cd97bbb058c4dc3f82e896b63d7792484f161d3fc1623", - "parts": { - "linux/amd64": { - "digest": "sha256:98271bd866bc798dc2e11ffdf2e838d0e7e1837e42b350e7c606d565cf0dfdd8", - "id": "sha256:fbd362edd5dc642642108998a2f362ec03fa380dfb8ce609c46be28abb5ac227", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:a5f367f0964deadd2fe26b115e5eeabcd0674a58110bc92c2e211aecd7a9cb02", - "id": "sha256:2f49053b767c803d37c3fc8d042eb335761d7903eeb1d6c834604e0d1eb7f8a8", - "platform": "linux/arm64" - } - } - }, - "docker.io/library/docker:28.3.3-dind": { - "digest": "sha256:c0872aae4791ff427e6eda52769afa04f17b5cf756f8267e0d52774c99d5c9de", - "mirror_digest": "sha256:8540613af2771eff7cc76188d1e2071e5800efcc17b2b640e4f2dd84d5ddd11d", - "parts": { - "linux/amd64": { - "digest": "sha256:acf2e2d09cedf21fa8f27bb0962674e33e159c744c152b248f1f7f43623ccd82", - "id": "sha256:b2a8bf985f76be7345c0c827c568db287ccb4dfad919a4693488224e3644fb0c", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:88bf2ba4673e2963049952ad60d46389e6d2be3d31f0f4d1d69c55e0712d22a4", - "id": "sha256:e0680e652cec44e947fd5401c80526cad1e184420cc2645f6399d102127e5bbf", - "platform": "linux/arm64" - } - } - }, - "docker.io/cllunsford/aws-signing-proxy:0.2.2": { - "digest": "sha256:928281593b8b69bc6608d113c6bcdd5c291216a6f3596403cb89309e4de728aa", - "id": "sha256:6cfb415b5f85e3522b4fd21c692a8343396f07db17e4056e21af94719cddf734", - "platform": "linux/amd64" - }, - "docker.io/lmenezes/cerebro:0.9.4": { - "digest": "sha256:7d9e2b77e459e55b4f8173f7a2fbb5257d88d5af829577266c5ddf2953fe82ec", - "id": "sha256:617b8f87c98a03ec4efffbca9534dd36dcdbf68235f775a78c6599a96c0bdb40", - "platform": "linux/amd64" - }, - "docker.io/opensearchproject/opensearch-dashboards:2.19.3": { - "digest": "sha256:a6b92ab4ccdb71a056347c1ca1022c4266e2cfd20fc0a74a6687433cb258ae71", - "mirror_digest": "sha256:0654300473c9f3b3e6dccbbf906eafb996293bcf9b42208fcf353cfb19a357fb", - "parts": { - "linux/amd64": { - "digest": "sha256:b51521e7ff99d6dbdc160835a2fbeb90465bfc58f92f3aa4387ecd56ae051d2f", - "id": "sha256:1534f70025563f31d9dea585dae5aeb508e7fbd24b4d40672bdab61f1351b094", - "platform": "linux/amd64" - }, - "linux/arm64": { - "digest": "sha256:4de79bb89763da9dd068335c8f57d6190dcc4e97b43d6440bab21a495a630e55", - "id": "sha256:cb69221a8c12e29464f69660ec60e4776d4b03fb0a8a9acc3b80f8fd4d312086", - "platform": "linux/arm64" - } - } - } -} \ No newline at end of file diff --git a/docs/azul-arch.svg b/docs/azul-arch.svg deleted file mode 100644 index a4bd471989..0000000000 --- a/docs/azul-arch.svg +++ /dev/null @@ -1,1744 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - Azul Architecture - - Layer 1 - - - - - Tallies Queue (FIFO, one message group per entity) - - - - - files_123 - - - - - projects_42 - - - - - samples_71 - - - - - files_98 - - - - - Data Store - - - - - Notifications Queue - - - - - ContributionsIndex - - - - - AggregateIndex - - - Rectangle : Violet - - - Indexer REST API - - - Rectangle : Blue - - - Notificationb1 - - - Rectangle : Blue - - - b1 - - - - - - - - - Rectangle : Blue - - - b2 - - - Rectangle : Green - - - Indexer Workers (64) - - - - - - Rectangle : Blue - - - Bundleb2 - - - - - - - SQS Push - - - Rectangle : Blue - - - 1 - - - Rectangle : Blue - - - Contributionfiles_123_b1 - - - Rectangle : Yellow - - - Agregator Workers (64) - - - Rectangle : Blue - - - 1 - - - Rectangle : Blue - - - 2 - - - Rectangle : Blue - - - 1 - - - Rectangle : Blue - - - 2 - - - - - - - - - Rectangle : Blue - - - Contributionprojects_42_b1 - - - Rectangle : Blue - - - Contributionsamples_71_b1 - - - - - - Rectangle : Blue - - - Aggregatefiles_98 - - - Rectangle : Blue - - - Aggregateprojects_42 - - - Rectangle : Blue - - - Aggregatesamples_71 - - - Rectangle : Blue - - - Contributionfiles_98_b2 - - - Rectangle : Blue - - - Contributionprojects_42_b2 - - - Rectangle : Blue - - - Contributionsamples_71_b2 - - - - - - Rectangle : Blue - - - Notificationb2 - - - - HTTP - - - Rectangle : Blue - - - Bundleb1 - - - - - - Rectangle : Blue - - - Aggregatefiles_123 - - - Rectangle : Violet - - - Service REST API - - - - - - - - - - - - - - - - EmulatedSQS Push - - - - Deferment - - - Rectangle : Blue - - - 1 - - - Rectangle : Blue - - - 1 - - - Rectangle : Blue - - - 1 - - - - - - diff --git a/docs/compliance/change_management.rst b/docs/compliance/change_management.rst deleted file mode 100644 index 9c45af2757..0000000000 --- a/docs/compliance/change_management.rst +++ /dev/null @@ -1,263 +0,0 @@ -Software development policy & procedures -**************************************** - - -Introduction -============ - -Changes to the system are implemented through a workflow process that involves -multiple team members and covers a range of steps including the -conceptualization, prioritization, implementation, testing, deployment, and -verification of changes on the production environment. Multiple software -products and services are utilized during this process including `git`_ for -version control, `GitHub`_ for issue tracking, pull request (PR) management and -continuous integration (CI), `Terraform`_ for deployment of infrastructure as -code (IaC), `GitLab`_ for CI/CD, and the cloud providers `AWS`_ and `Google -Cloud`_ for cloud resources. - -.. _git: https://git-scm.com/ -.. _GitHub: https://github.com/ -.. _Terraform: https://www.terraform.io/ -.. _GitLab: https://about.gitlab.com/ -.. _AWS: https://aws.amazon.com/ -.. _Google Cloud: https://cloud.google.com/ - -During this process of managing changes, several separate `deployments`_ are -used. The `dev` deployment receives changes that were merged into the `develop` -branch, and allows verification in a scaled down production-like environment -prior to changes being merged into the `prod` branch and deployed to the `prod` -deployment. For the `AnVIL project`_, this setup is duplicated with both an -`anvildev` and `anvilprod` deployment. The two dev deployments `dev` and -`anvildev` are updated together from the same `develop` branch, however they -differ from each other in their deployment configuration which specifies the set -of TDR snapshots the deployment will index, and which metadata and repository -plugins the deployment will use. Similar to the dev deployments, the two prod -deployments `prod` and `anvilprod` are kept in sync with updates from the `prod` -branch. - -.. _deployments: https://docs.google.com/document/d/1Kg0dMZmCw6gtkvabD2jYWPZO2Mx_wsC8BJPKdeKTfg0/edit#bookmark=id.3zefi1arki7p -.. _AnVIL project: https://anvilproject.org/ - - -Issue Management -================ - -All change requests, including requests for new features, changes to existing -features, or changes that address defects, start as GitHub issues in one of the -GitHub repositories used by the system. The GitHub issue includes a description -of the desired change and, if the issue constitutes a defect, the steps needed -to reproduce it. - -The `project manager`_ and `system administrator`_ triage the GitHub issue, -determine its priority relative to existing issues, and assign it to a developer -for implementation. - -.. _project manager: https://docs.google.com/document/d/1Kg0dMZmCw6gtkvabD2jYWPZO2Mx_wsC8BJPKdeKTfg0/edit#heading=h.jk936f4i59y8 -.. _system administrator: https://docs.google.com/document/d/1Kg0dMZmCw6gtkvabD2jYWPZO2Mx_wsC8BJPKdeKTfg0/edit#heading=h.o3qbvwbucpqo - - -Code Development & Peer Review -============================== - -When working on a GitHub issue, the assigned developer will create a feature -branch using the latest commit from the `develop` branch as a base. The -`develop` branch contains the latest development code, and is updated by the -`operator`_ through the merging of tested, reviewed, and approved feature -branches. - -.. _operator: https://docs.google.com/document/d/1Kg0dMZmCw6gtkvabD2jYWPZO2Mx_wsC8BJPKdeKTfg0/edit#heading=h.1rxjx57g24fq - -Following the guidance provided in the contribution guide (`CONTRIBUTING.rst`_) -and project README (`README.md`_), the developer implements the requested change -in the feature branch and deploys the branch to their personal deployment for -testing. A developer's personal deployment is similar in configuration to the -`dev` deployment, with the main difference being the scale of the infrastructure -(such as the size of the ElasticSearch domain) and the set of snapshots (frozen -sets of metadata documents) configured to be indexed on each deployment. - -.. _CONTRIBUTING.rst: https://github.com/DataBiosphere/azul/blob/develop/CONTRIBUTING.rst -.. _README.md: https://github.com/DataBiosphere/azul/blob/develop/README.md - -When the developer’s feature branch is ready, it is pushed to GitHub where a PR -is created and connected to the respective GitHub issue. GitHub performs CI -checks against the branch including running unit tests, checking for -vulnerabilities with `CodeQL`_, checking test coverage with `CodeCov`_ & -`Coveralls`_, and security scanning with `Snyk`_. The unit tests have no -dependencies on the cloud infrastructure in any deployments within the system. -If a unit test covers code that relies on a cloud resource, that resource is -mocked by the test. Only PRs from developers (team members) kick off unit tests. -This is because running unit tests consumes resources and requires credentials -for uploading test coverage results to CodeCov & Coveralls. - -.. _CodeQL: https://codeql.github.com/ -.. _CodeCov: https://about.codecov.io/ -.. _Coveralls: https://github.com/marketplace/coveralls -.. _Snyk: https://snyk.io/ - -The developer then follows the `checklist`_ included in every PR to ensure that -the PR has been properly set up and is ready for review. When ready, the -developer requests a review from a peer. If the peer has review feedback and/or -requests changes, ownership of the PR goes back to the developer for updates and -the review process repeats. - -.. _checklist: https://github.com/DataBiosphere/azul/blob/develop/.github/pull_request_template.md - - -Change Approval -=============== - -Once the PR is approved by the peer, a review is requested from the system -administrator. If the system administrator has review feedback and/or requests -changes, ownership of the PR goes back to the developer for updates. After -completing the updates, the developer requests another review from the system -administrator and the review process is repeated until the PR is approved. When -approving a PR, the system administrator decides what procedures (if any) are -needed to demonstrate the resolution of the issue, adds these demo expectations -to the GitHub issue (or marks the issue “no demo”), approves the PR, and assigns -the PR to the operator for further validation and merging the PR's feature -branch into the `develop` branch. - - -Deployment to dev environment -============================= - -To facilitate CI/CD to the various deployments, multiple separate GitLab -instances are used. One GitLab instance is used to manage both the `dev` and -`sandbox` deployments, and another GitLab instance is used solely for the `prod` -deployment. This setup is mirrored for the AnVIL project, with one GitLab -instance to manage both the `anvildev` and `anvilbox` deployments, and another -GitLab instance for the `anvilprod` deployment. - -The `sandbox` deployment is similar in configuration to the `dev` deployment, -the main difference being the scale of the infrastructure. The `sandbox` and -`dev` deployments share the same set of snapshots, although `sandbox` only -indexes a subset of each snapshot. The operator follows the PR checklist to -validate the feature branch in the `sandbox` and `anvilbox` deployments prior to -merging into the `develop` branch. The exception to this is when a PR is labeled -`no sandbox`, which indicates that the system administrator has deemed it not -necessary to test the PR in the sandbox, for instance when the change is -specific only to the `dev` deployment. - -This process of testing a PR in the `sandbox` deployment starts with the feature -branch being rebased on the latest commit in `develop` and the squashing of any -fixup commits. The operator then pushes the feature branch to GitHub, followed -by `GitLab dev`_ and `GitLab anvildev`_. On GitLab, the branch is run through a -CI/CD pipeline to build, test, deploy the branch to the `sandbox` and `anvilbox` -deployments, and run integration tests against the deployments. The difference -between unit and integration tests is that unit tests are specific to individual -components of the system and will mock components that are outside the focus of -the test, while integration tests follow a holistic approach to verify the -interconnection between the components of the system as a whole. - -.. _GitLab dev: https://gitlab.dev.singlecell.gi.ucsc.edu/ -.. _GitLab anvildev: https://gitlab.anvil.gi.ucsc.edu/ - -A reindex is performed on the `sandbox` deployment if the feature branch -includes an update to the set of snapshots indexed by the deployment or changes -the behavior of the indexer in a way that affects the shape of documents in the -ElasticSearch index. - -Some PRs require the operator to perform special procedures beyond the standard -deploy/test/reindex cycle. Common examples of this include deploying to the -`shared` components (which manage infrastructure shared between deployments in -the same AWS account, e.g., `dev.shared` and `prod.shared`) and updating the -GitLab instances. These special procedures are referred to as upgrading -instructions and are cumulatively documented in `UPGRADING.rst`_. - -.. _UPGRADING.rst: https://github.com/DataBiosphere/azul/blob/develop/UPGRADING.rst - -After the CI/CD pipeline in GitLab completes without error, the operator merges -the feature branch into `develop`. The operator then pushes the updated -`develop` branch to GitHub, followed by GitLab `dev` and `anvildev`. On GitLab, -the `develop` branch is run through a CI/CD pipeline again to build, test, -deploy the merged changes, but this time to the `dev` (or `anvildev`) -deployment, and run integration tests against that deployment. A reindex is -performed on the deployment if the feature branch includes an update to the set -of snapshots indexed by the deployment or changes the behavior of the indexer. - - -Deployment to production environment -==================================== - -Once a week, the system administrator and operator review the recent changes to -the `develop` branch and decide which changes are ready to be promoted to the -`prod` and `anvilprod` deployments. The decision as to what changes to include -in a promotion considers a number of factors: For one, changes should usually -mature on the develop branch for one week, before they are promoted to `prod`, -so that they can be validated interactively, and more subtle defects like memory -leaks have time to emerge. If the changes affect a REST API in a way that -requires changes to the UI code, a second PR must add those changes to the UI -component. Only after both PRs have been deployed to `dev`, can they be promoted -to `prod`. The operator creates a GitHub issue for the promotion, creates a -branch from the agreed commit in the `develop` branch, pushes the branch to -GitHub, and creates a promotion PR. The promotion PR contains a `promotion -checklist`_ of tasks for the operator to complete to ensure the PR is properly -set up and ready for review. The operator requests a review from the system -administrator, and after approval the PR is assigned back to the operator. - -.. _promotion checklist: https://github.com/DataBiosphere/azul/blob/develop/.github/PULL_REQUEST_TEMPLATE/promotion.md - -At this time the operator announces the promotion via Slack. The promotion -branch is merged into the `prod` branch, then the updated `prod` branch is -pushed to GitHub, followed by `GitLab prod`_ and `GitLab anvilprod`_. On GitLab, -the `prod` branch is run through a CI/CD pipeline to build, test, deploy to the -`prod` deployment, and run integration tests. A reindex is performed on the -`prod` deployment if the promotion PR includes an update to the set of snapshots -indexed by the deployment or changes to the indexer. The operator also performs -all accumulated upgrading instructions from the changes included in the -promotion. When the operator finishes with the updates, the promoted GitHub -issues are marked as merged, and the promotion PR checklist is completed with -the operator unassigning themself from the promotion PR. - -.. _GitLab prod: https://gitlab.azul.data.humancellatlas.org/ucsc/azul -.. _GitLab anvilprod: https://gitlab.explore.anvilproject.org - -As a final step in the process, a meeting is held once a week for developers to -demonstrate to the team the changes they’ve implemented. Following the demo -expectations provided by the system administrator at the time of approval, a -developer demonstrates the resolution of the GitHub issue to the team, and if -successful the issue is then closed. Issues marked “no demo” are also closed at -this time. In the event that a demonstration shows that the issue has not been -successfully resolved, the original issue will be put back in the developer’s -sprint for additional work, or a new follow-up issue will be created. - - -Hotfixes and backports -====================== - -An exception to the procedure of change management and deployment detailed above -is in the case of a `hotfix`_. A hotfix is a change made directly to, or that is -merged into, the `prod` branch without first being merged into the `develop` -branch. The system administrator may determine that a hotfix is necessary when a -defect is discovered following an update to the production environment and there -is need for urgent remediation. Using the checklist included in the `hotfix -PR`_, the change is created, reviewed, and deployed to the production -environment. After a hotfix has been deployed, a `backport PR`_ is created to -backport the change from the `prod` branch to `develop`. - -.. _hotfix: https://github.com/DataBiosphere/azul/blob/develop/CONTRIBUTING.rst#hotfixes -.. _hotfix PR: https://github.com/DataBiosphere/azul/blob/develop/.github/PULL_REQUEST_TEMPLATE/hotfix.md -.. _backport PR: https://github.com/DataBiosphere/azul/blob/develop/.github/PULL_REQUEST_TEMPLATE/backport.md - - -GitLab updates -============== - -The GitLab instances used by the system for CI/CD are self-managed, created from -`GitLab Docker images`_, and are routinely updated by the operator as security -release updates and new versions of GitLab become available. When an update to -GitLab is available, the operator reviews the list of changes in the update with -the system administrator. If the update is approved, the operator will first -create a backup of the storage volumes attached to the `dev` and `anvildev` -GitLab instances. The operator then creates a feature branch to update the -version of the `GitLab Docker image`_ and/or `GitLab runner image`_ used by the -system, and deploys this change to the `dev` and `anvildev` deployments. Once -the new GitLab instances have been created and are active, the same change is -deployed to update the GitLab instances used by the production (`prod` and -`anvilprod`) deployments. Once GitLab has been updated on all deployments, a PR -is created from the feature branch, and the PR checklist is followed to get the -PR reviewed, approved, and merged. - -.. _GitLab Docker images: https://docs.gitlab.com/ee/install/docker.html -.. _GitLab Docker image: https://hub.docker.com/r/gitlab/gitlab-ce/tags -.. _GitLab runner image: https://hub.docker.com/r/gitlab/gitlab-runner/tags diff --git a/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v1_0_0.pdf b/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v1_0_0.pdf deleted file mode 100644 index 586474af23..0000000000 Binary files a/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v1_0_0.pdf and /dev/null differ diff --git a/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v2.0.0.pdf b/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v2.0.0.pdf deleted file mode 100644 index 68fc3c93ef..0000000000 Binary files a/docs/compliance/cis/CIS_Amazon_Linux_2_Benchmark_v2.0.0.pdf and /dev/null differ diff --git a/docs/compliance/cis/license.txt b/docs/compliance/cis/license.txt deleted file mode 100644 index dff84903ae..0000000000 --- a/docs/compliance/cis/license.txt +++ /dev/null @@ -1,429 +0,0 @@ -https://www.cisecurity.org/benchmark/amazon_linux - -Attribution-ShareAlike 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution-ShareAlike 4.0 International Public -License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution-ShareAlike 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You -such rights in consideration of benefits the Licensor receives from -making the Licensed Material available under these terms and -conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative - Commons as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name - of a Creative Commons Public License. The License Elements of this - Public License are Attribution and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - i. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - l. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - m. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. Additional offer from the Licensor -- Adapted Material. - Every recipient of Adapted Material from You - automatically receives an offer from the Licensor to - exercise the Licensed Rights in the Adapted Material - under the conditions of the Adapter's License You apply. - - c. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - b. ShareAlike. - - In addition to the conditions in Section 3(a), if You Share - Adapted Material You produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material, - including for purposes of Section 3(b); and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/docs/dependencies.svg b/docs/dependencies.svg deleted file mode 100644 index 9c1484a9e8..0000000000 --- a/docs/dependencies.svg +++ /dev/null @@ -1,185 +0,0 @@ - -TBDBDRTR diff --git a/docs/gitlab.monopic b/docs/gitlab.monopic deleted file mode 100644 index e8f4331a8d..0000000000 Binary files a/docs/gitlab.monopic and /dev/null differ diff --git a/docs/licenses/aws-signing-proxy.txt b/docs/licenses/aws-signing-proxy.txt deleted file mode 100644 index d97f5e6375..0000000000 --- a/docs/licenses/aws-signing-proxy.txt +++ /dev/null @@ -1,5 +0,0 @@ -https://github.com/cllunsford/aws-signing-proxy/blob/master/README.md - -License -MIT 2018 (c) Chris Lunsford - diff --git a/docs/licenses/aws_cli.txt b/docs/licenses/aws_cli.txt deleted file mode 100644 index 8cc43f554d..0000000000 --- a/docs/licenses/aws_cli.txt +++ /dev/null @@ -1,14 +0,0 @@ -https://github.com/aws/aws-cli/blob/develop/LICENSE.txt - -Copyright 2012-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"). You -may not use this file except in compliance with the License. A copy of -the License is located at - - http://aws.amazon.com/apache2.0/ - -or in the "license" file accompanying this file. This file is -distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -ANY KIND, either express or implied. See the License for the specific -language governing permissions and limitations under the License. diff --git a/docs/licenses/bigquery.txt b/docs/licenses/bigquery.txt deleted file mode 100644 index 9d642030e9..0000000000 --- a/docs/licenses/bigquery.txt +++ /dev/null @@ -1,121 +0,0 @@ -https://cloud.google.com/terms/service-terms - -Service Specific Terms - -These Service Specific Terms are incorporated into the agreement under which Google has agreed to provide Google Cloud Platform (as described at https://cloud.google.com/terms/services) to Customer (the “Agreement”). If the Agreement authorizes the resale or supply of Google Cloud Platform under a Google Cloud partner or reseller program, then all references to Customer in the Service Specific Terms mean Partner or Reseller (as applicable), and all references to Customer Data in the Service Specific Terms mean Partner Data. Capitalized terms used but not defined in the Service Specific Terms have the meaning given to them in the Agreement. - -General Service Terms - -1. Data Location. Customer may configure the Services listed at https://cloud.google.com/terms/data-residency to store Customer Data in a specific Region or Multi-Region as detailed in the Cloud Locations Page, and Google will store that Customer Data at rest only in the selected Region or Multi-Region. The Services do not limit the locations from which Customer or Customer End Users may access Customer Data or to which they may move Customer Data. For clarity, Customer Data does not include resource identifiers, attributes, or other data labels. - -2. Operations of Communications Services. Notwithstanding any telecommunications restrictions in the Agreement, Customer may use the Services for hosting capacity in connection with Customer’s provision of telecommunications services to Customer End Users if (a) Customer obtains, maintains, and complies with all necessary regulatory licenses, registrations or other applicable requirements relating to such telecommunications services, and (b) Customer does not use or resell the Services to provide telecommunications connectivity, including for virtual private network services, network transport, or voice or data transmission. - -3. General Software Terms. The following terms apply to all Software: - - a. License. Google grants Customer a royalty-free (unless otherwise stated by Google), non-exclusive, non-sublicensable, non-transferable license during the Term to reproduce and use the Software ordered by Customer on systems owned, operated, or managed by or on behalf of Customer in accordance with (i) the Agreement, and (ii) if applicable, the Scope of Use. Customer may authorize its and its Affiliates' employees, agents, and subcontractors (collectively, “Software Users”) to use the Software in accordance with this section (License), so long as Customer remains responsible. Customer may make a reasonable number of copies of the Software for back-up and archival purposes. For clarity, Software does not constitute Services. - - b. Documentation. Google may provide Documentation describing the appropriate operation of the Software, including a description of how Software is properly used, and whether and how the Software collects and processes data. Customer will comply with any restrictions in the Documentation regarding Software use. - - c. Compliance With Scope of Use. Within 30 days of Google’s reasonable written request, Customer will provide a sufficiently detailed written report describing its usage in accordance with the applicable Scope of Use of each Software product used by Customer and its Software Users during the requested period. If requested, Customer will provide reasonable assistance and access to information to verify the accuracy of Customer’s Software usage report(s). - - d. Other Warranties and Compliance. Each party represents and warrants that it will comply with all laws and regulations applicable to its provision or use of the Software, as applicable. Customer will: (i) ensure that Customer and its Software Users' use of the Software complies with the Agreement and the restrictions in the Agreement applying to Customer's use of the Services; (ii) use commercially reasonable efforts to prevent and terminate any unauthorized access to or use of the Software; and (iii) promptly notify Google of any unauthorized access to or use of the Software of which Customer becomes aware. If the Software contains open source or third-party components, those components may be subject to separate license agreements, which Google will make available to Customer. Customer is solely responsible for complying with the terms of any third-party sources from which Customer elects to migrate its workloads onto the Services, and represents and warrants that such third-party sources permit the use of Software to migrate applications away from such sources. If the Agreement terminates or expires, then Customer will stop using all Software and delete it from Customer's systems. - -4. Premium Software Terms. The following terms apply only to Premium Software: - - a. Introduction. Google makes certain Software available under the Agreement described as “Premium Software” at https://cloud.google.com/terms/services(“Premium Software”). Customer will pay applicable Fees for any Premium Software it obtains as described at the Fees URL. Premium Software is Google’s Confidential Information. - - b. Software Warranty. Google warrants to Customer that for one year from its delivery, Premium Software will perform in material conformance with the applicable Documentation. This warranty will not apply if (i) Customer does not notify Google of the non-conformity within 30 days after Customer first discovers it, (ii) Customer modifies Premium Software or uses it in violation of the Agreement, or (iii) the non-conformity is caused by any third-party hardware, software, services, or other offerings or materials, in each case not provided by Google. - - If Google breaches this warranty, then Google will, in its discretion, repair or replace the impacted Premium Software at no additional charge. If Google does not believe that repairing or replacing would be commercially reasonable, then Google will notify Customer and (A) Customer will immediately cease use of the impacted Premium Software and (B) Google will refund or credit any prepaid amounts for the impacted Premium Software and Customer will be relieved of any then-current commitment to pay for future use of the impacted Premium Software. Without limiting the parties’ termination rights, this section (Software Warranty) states Customer’s sole remedy for Google’s breach of the warranty in this section (Software Warranty). - - c. Software Indemnification. Google’s indemnity obligations under the Agreement with respect to allegations of infringement of third-party Intellectual Property Rights apply to Premium Software, and Customer’s indemnity obligations under the Agreement with respect to Customer’s use of the Services apply to Customer’s use of Premium Software. In addition to any other indemnity exclusions in the Agreement, Google’s indemnity obligations will not apply to the extent the underlying allegation arises from modifications to Premium Software not made by Google or use of versions of Premium Software that are no longer supported by Google. - - d. Technical Support. Unless otherwise specified by Google, Google will make TSS available for Premium Software for an additional charge, in accordance with the TSS Guidelines. - - e. Compliance. Premium Software may transmit to Google metering information reasonably necessary to verify that use of the Premium Software complies with the Scope of Use, as described in the applicable Documentation. Customer will not disable or interfere with the transmission of such metering information. - - f. Updates and Maintenance. During the Term, Google will make available to Customer copies of all current versions, updates, and upgrades of Premium Software, promptly upon general availability, as described in the Documentation. Unless otherwise stated in the Documentation for the applicable component of Premium Software, Google will maintain the current release of Premium Software and the two versions immediately preceding the current release, including by providing reasonable bug fixes and security patches. Maintenance for any Premium Software may be discontinued with one year’s notice from Google, except Google may eliminate maintenance for a version and require upgrading to a maintained version to address a material security risk or when reasonably necessary to avoid an infringement claim or comply with applicable law. - -5. Pre-GA Offerings Terms. Google may make available to Customer pre-general availability Google Cloud Platform features, services or software that are either not yet listed at https://cloud.google.com/terms/services or identified as “Early Access,” “Alpha,” “Beta,” “Preview,” “Experimental,” or a similar designation in related documentation or materials (collectively, “Pre-GA Offerings”). While Pre-GA Offerings are not Services or Software, Customer’s use of Pre-GA Offerings is subject to the terms of the Agreement applicable to Services (or Software, if applicable), as amended by this Section 5. - -Customer may provide feedback and suggestions about the Pre-GA Offerings to Google, and Google and its Affiliates may use any feedback or suggestions provided without restriction and without obligation to Customer. - -PRE-GA OFFERINGS ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OR REPRESENTATIONS OF ANY KIND. Pre-GA Offerings (a) may be changed, suspended or discontinued at any time without prior notice to Customer and (b) are not covered by any SLA or Google indemnity. Except as otherwise expressly indicated in a written notice or the documentation for a given Pre-GA Offering, (i) Pre-GA Offerings may not be covered by TSS, (ii) the Cloud Data Processing Addendum does not apply to Pre-GA Offerings and Customer should not use Pre-GA Offerings to process personal data or other data subject to legal or regulatory compliance requirements, and (iii) Google’s data location commitments set out in these Service Specific Terms will not apply to Pre-GA Offerings. With respect to Pre-GA Offerings, to the maximum extent permitted by applicable law, neither Google nor its suppliers will be liable for any amounts in excess of the lesser of (A) the limitation on the amount of liability stated in the Agreement or (B) $25,000. Nothing in the preceding sentence will affect the remaining terms of the Agreement relating to liability (including any specific exclusions from any limitation of liability). Customer's access to and use of any Pre-GA Offering is subject to any applicable Scope of Use. Either party may terminate Customer's use of a Pre-GA Offering at any time with written notice to the other party. Certain Pre-GA Offerings may be subject to additional terms stated below. - -6. Google-Managed Multi-Cloud. The then-current services described as “Google-Managed Multi-Cloud Services” at https://cloud.google.com/terms/services ("Google-Managed MCS") are Google services, products and features that are hosted on the infrastructure of a third party cloud provider (“Multi-Cloud Service Third-Party Provider”). While the Google-Managed MCS are not Services or Software, Customer’s use of the Google-Managed MCS is subject to the terms of the Agreement applicable to Services (or Software, if applicable), as amended by this Section 6. In addition to the terms of the Agreement and notwithstanding anything to the contrary in the “Conflicting Terms” section of the Agreement, Customer’s use of the Google-Managed MCS is subject to the following terms: - -a. Admin Console. The Google-Managed MCS may not be available through the Admin Console. - -b. Multi-Cloud Service Third-Party Provider Relationship. - -i. To make use of the Google-Managed MCS, Customer must maintain an independent account and billing relationship with the applicable Multi-Cloud Service Third-Party Provider. Customer is responsible for entering into and complying with an appropriate agreement with the applicable Multi-Cloud Service Third-Party Provider governing Customer’s use of the Multi-Cloud Service Third-Party Provider’s services and the Multi-Cloud Service Third-Party Provider’s processing of personal data on behalf of Customer. The Agreement does not obligate Google or the Multi-Cloud Service Third-Party Provider to provide the Multi-Cloud Service Third-Party Provider’s services that are necessary for the Customer to use the Google-Managed MCS. - -ii. If the Multi-Cloud Service Third-Party Provider makes a change to its services or terms, and Google reasonably concludes that its provision of the Google-Managed MCS is no longer commercially feasible as a result of the change, Google may immediately Suspend all or part of Customer's use of the impacted Google-Managed MCS, or make any other discontinuance or backwards-incompatible change necessary to continue to provide the Google-Managed MCS. Google will lift any such Suspension once the circumstances giving rise to the Suspension have been resolved. To the extent Google may Suspend or modify the Google-Managed MCS as set forth in this Section, the Google-Managed MCS are not subject to the sections of the Agreement covering discontinuance and backwards-incompatible changes. - -c. Data Processing. Processing of data by the Google-Managed MCS is subject to the terms of the Cloud Data Processing Addendum, as supplemented and amended by the Google-Managed Multi-Cloud Services Cloud Data Processing Addendum. "Google-Managed Multi-Cloud Services Cloud Data Processing Addendum" means the terms stated at https://cloud.google.com/terms/mcs-data-processing-terms. - -d. Limitation of Liability. Notwithstanding anything to the contrary in the Agreement (except subject to any unlimited liabilities expressly stated in the Agreement), to the maximum extent permitted by law, each party’s total aggregate Liability for damages arising out of or relating to the Google-Managed MCS is limited to the greater of (1) the Fees Customer paid for the Google-Managed MCS during the 12-month period before the event giving rise to liability and (2) $25,000. - -e. Disclaimers. Notwithstanding anything to the contrary in the Agreement, the Google-Managed MCS (i) are not covered by any SLA covering Google Cloud Platform Services, unless specifically identified under the terms of the SLA, (ii) are not subject to any obligations for Google to provide termination or transition assistance or other technical assistance after Suspension or termination, (iii) are not subject to any business continuity or disaster recovery commitments, and (iv) are not Audited Services, unless specifically identified at the Audited Services URL as listed in Section 2.1 (Definitions) of the CDPA. - -f. Survival. The following subsections of these Google-Managed Multi-Cloud Service Specific Terms will survive expiration or termination of the Agreement: d (Limitation of Liability); e (Disclaimer); and f (Survival). - -7. Benchmarking. Customer may conduct benchmark tests of the Services (each a "Test"). Customer may only publicly disclose the results of such Tests if it (a) obtains Google's prior written consent, (b) provides Google all necessary information to replicate the Tests, and (c) allows Google to conduct benchmark tests of Customer's publicly available products or services and publicly disclose the results of such tests. Notwithstanding the foregoing, Customer may not do either of the following on behalf of a hyperscale public cloud provider without Google's prior written consent: (i) conduct (directly or through a third party) any Test of the Services or (ii) disclose the results of any such Test. - -8. Trials. Certain Services may be made available to Customer on a trial basis. The parameters of each trial, including any Scope of Use, may be presented to Customer either through the Fees URL, Admin Console, Documentation, email, or as otherwise communicated by Google. Use of a trial indicates Customer’s acceptance of any such parameters. - -9. User Experience Research. If Customer enrolls in the Google Cloud User Experience Research Program for Google Cloud Platform, Customer’s participation will be subject to the Google Cloud User Experience Research Panel Addendum available at https://cloud.google.com/terms/user-experience-research or a successor URL. - -10. PGSSI-S. Customer will comply with France's General Security Policy for Health Information Systems (PGSSI-S) to the extent applicable. - -11. Additional Definitions. - -“Cloud Locations Page” means https://cloud.google.com/about/locations/. - -“Documentation” means the then-current Google documentation made available by Google to its customers for use with the Services at https://cloud.google.com/docs/. - -“Fees URL” means https://cloud.google.com/skus. - -“Multi-Region” means a defined set of Regions. - -“Region” means a region from which a particular Service is offered, as identified at the Cloud Locations Page. - -“Scope of Use” means any limits on installation or usage of Services or Software described at the Fees URL, Admin Console, order form, or otherwise presented by Google. - -Service Terms - -5. BigQuery. - -a. ODBC/JDBC Drivers. The ODBC and JDBC drivers for BigQuery (as described here) are “Software” as defined in the Agreement and any use of them is subject to the “General Software Terms” above. These drivers may only be used with BigQuery and may not be used with any other product or service. - -b. Analytics Hub (PREVIEW) - -(i) Introduction. Analytics Hub is a feature of BigQuery that helps users find and share sets of Customer Data (“Datasets”). Datasets are organized into shared repositories (“Exchanges”) with each Dataset’s listings containing (as applicable) a description, documentation, branding, metadata or similar materials (“Listing Materials”). - -(ii) Roles. Customer can serve different roles in Analytics Hub: - - A. “Publishers” create and submit Datasets for listing within Exchanges. - - B. “Subscribers” request access to Datasets listed in Exchanges for their own use in BigQuery. - - C. “Exchange Administrators” (1) create and administer Exchanges, (2) add or remove Listing Materials in Exchanges, (3) control visibility of Exchanges and Listing Materials, and (4) on behalf of the relevant Publisher, enable or reject Subscriber requests to access Datasets. - -(iii) Publishers. Publishers can serve as their own Exchange Administrators or submit Datasets to Exchanges operated by third-party Exchange Administrators. In the latter case, the Publisher submits its Dataset to the Exchange Administrator and must follow any enrollment steps specified by the Exchange Administrator for its Exchange. - -(iv) Exchange Administrators. When acting as an Exchange Administrator, Customer must: - - A. Secure and maintain all necessary rights, consents and permissions (including from any third-party Publishers) to list, share or take other action with respect to the Datasets or Listing Materials; and - - B. Handle takedown requests, data subject requests, notices of infringement, and any other notices or requests it receives regarding its Exchange or related Listing Materials or Datasets. - - For clarity, Listing Materials are considered Customer Data of the applicable Exchange Administrator. - -(v) Separate Relationships. - - A. Google is not responsible for and will have no liability to Customer in relation to any terms or relationships between Customer and any third party acting as a Publisher, Subscriber, or Exchange Administrator. If Customer offers any commitments to any third such party beyond the commitments that Google has made to Customer in the Agreement (including in these Service Specific Terms), Google will not be liable for such commitments. - - B. Publishers and Exchange Administrators must ensure that any terms they have with Subscribers do not contradict these Service Specific Terms or the Agreement. - - C. If a Publisher or Exchange Administrator charges fees for access to Datasets, it is solely responsible for (1) collecting the fees independently of Google and Analytics Hub and (2) any related refunds or liabilities to Subscribers. - - D. Google does not guarantee continued availability of any Datasets, and gives no warranty, indemnification or other obligation, and accepts no liability or responsibility, with respect to Datasets or their use. diff --git a/docs/licenses/cerebro.txt b/docs/licenses/cerebro.txt deleted file mode 100644 index e2904bbac9..0000000000 --- a/docs/licenses/cerebro.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/lmenezes/cerebro/blob/main/LICENSE - -MIT License - -Copyright (c) 2017 Leonardo Menezes - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/curl.txt b/docs/licenses/curl.txt deleted file mode 100644 index 3ef779c4eb..0000000000 --- a/docs/licenses/curl.txt +++ /dev/null @@ -1,13 +0,0 @@ -https://curl.se/docs/copyright.html - -COPYRIGHT AND PERMISSION NOTICE - -Copyright (c) 1996 - 2022, Daniel Stenberg, daniel@haxx.se, and many contributors, see the THANKS file. - -All rights reserved. - -Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization of the copyright holder. diff --git a/docs/licenses/docker.txt b/docs/licenses/docker.txt deleted file mode 100644 index 18f0c0b213..0000000000 --- a/docs/licenses/docker.txt +++ /dev/null @@ -1,51 +0,0 @@ -https://www.docker.com/legal/docker-software-end-user-license-agreement/ - -THIS DOCKER SOFTWARE END USER LICENSE AGREEMENT (“AGREEMENT”) IS BY AND BETWEEN DOCKER, INC., LOCATED AT 318 CAMBRIDGE AVENUE, PALO ALTO, CALIFORNIA 94306 USA (“DOCKER”) AND THE INDIVIDUAL OR LEGAL ENTITY WHO IS USING THE APPLICABLE SOFTWARE MADE AVAILABLE BY DOCKER (“CUSTOMER”) AND GOVERNS ALL USE BY CUSTOMER OF SUCH SOFTWARE. - -BY DOWNLOADING OR USING THE SOFTWARE YOU EXPRESSLY ACCEPT AND AGREE TO THE TERMS OF THIS AGREEMENT. IF YOU ARE AN INDIVIDUAL AGREEING TO THE TERMS OF THIS AGREEMENT ON BEHALF OF AN ENTITY, SUCH AS YOUR EMPLOYER, YOU REPRESENT THAT YOU HAVE THE LEGAL AUTHORITY TO BIND THAT ENTITY AND “YOU” AND “YOUR” SHALL REFER HEREIN TO SUCH ENTITY. IF YOU DO NOT HAVE SUCH AUTHORITY, OR IF YOU DO NOT AGREE WITH ALL THE TERMS OF THIS AGREEMENT, YOU MUST NOT DOWNLOAD, INSTALL, DEPLOY, OR USE THE SOFTWARE. - -1. DEFINITIONS - -The following capitalized terms shall have the meanings set forth below: - -1.1 “Licensed Software” means the Docker software licensed to you pursuant to the terms of this Agreement, excluding any Open Source Software contained therein. - -1.2 “Open Source Software” means Docker or third party software that is distributed or otherwise made available as “free software”, “open source software” or under a similar licensing or distribution model. - -2. LICENSE - -2.1 Licensed Software. Subject to your compliance with the terms and conditions of this Agreement, Docker hereby grants You a limited, non-exclusive, non-transferable, non-sub-licensable license to install, copy and use the Licensed Software solely for your internal use. - -2.2 Open Source Software. If applicable, Open Source Software is distributed or made available under the terms of the open source license agreements referenced in the applicable distribution or the applicable help, notices, about or source files. Copyrights and other proprietary rights to the Open Source Software are held by the copyright holders identified in the applicable distribution or the applicable help, notices, about or source files. - -3. RESTRICTED ACTIVITIES - -You shall not, and shall not encourage any third party to: (a) modify, adapt, alter, translate, or create derivative works of the Licensed Software; (b) reverse-engineer, decompile, disassemble, or attempt to derive the source code for the Licensed Software, in whole or in part, except to the extent that such activities are permitted under applicable law; (c) distribute, license, sublicense, lease, rent, loan, or otherwise transfer the Licensed Software to any third party; (d) remove, alter, or obscure in any way the proprietary rights notices (including copyright, patent, and trademark notices and symbols) of Docker or its suppliers contained on or within any copies of the Licensed Software; (e) use the Licensed Software for the purpose of creating a product or service competitive with the Licensed Software; (f) use the Licensed Software for any time-sharing, outsourcing, service bureau, hosting, application service provider or like purposes; (g) disclose the results of any benchmark tests on the Licensed Software without Docker’s prior written consent; or (h) use the Licensed Software other than as described in the documentation provided therewith, or for any unlawful purpose. - -4. OWNERSHIP - -Docker and its licensors own and retain all right, title, and interest, including all intellectual property rights, in and to the Licensed Software, including any improvements, modifications, and enhancements to it. Except for the rights expressly granted in this Agreement, You shall acquire no other rights, express or implied, in or to the Licensed Software, and all rights not expressly provided to you hereunder are reserved by Docker and its licensors. All the copies of the Licensed Software provided or made available hereunder are licensed, not sold. - -5. FEEDBACK - -Feedback means any comments or other feedback You may provide to Docker, at your sole discretion, concerning the functionality and performance of the Licensed Software, including identification of potential errors and improvements. By submitting any Feedback, You hereby assign to Docker all right, title, and interest in and to the Feedback, if any. - -6. NO WARRANTIES - -YOU EXPRESSLY UNDERSTAND AND AGREE THAT ALL USE OF THE LICENSED SOFTWARE IS AT YOUR SOLE RISK AND THAT THE LICENSED SOFTWARE IS PROVIDED “AS IS” AND “AS AVAILABLE.” DOCKER, ITS SUBSIDIARIES AND AFFILIATES, AND ITS LICENSORS MAKE NO EXPRESS WARRANTIES AND DISCLAIM ALL IMPLIED WARRANTIES REGARDING THE LICENSED SOFTWARE, INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT, TOGETHER WITH ANY AND ALL WARRANTIES ARISING FROM COURSE OF DEALING OR USAGE IN TRADE. NO ADVICE OR INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED FROM DOCKER OR ELSEWHERE SHALL CREATE ANY WARRANTY NOT EXPRESSLY STATED IN THIS AGREEMENT. WITHOUT LIMITING THE GENERALITY OF THE FOREGOING, DOCKER, ITS SUBSIDIARIES AND AFFILIATES, AND ITS LICENSORS DO NOT REPRESENT OR WARRANT TO YOU THAT: (A) YOUR USE OF THE LICENSED SOFTWARE WILL MEET YOUR REQUIREMENTS, OR (B) YOUR USE OF THE LICENSED SOFTWARE WILL BE UNINTERRUPTED, TIMELY, SECURE OR FREE FROM ERROR. NOTWITHSTANDING THE FOREGOING, NOTHING HEREIN SHALL EXCLUDE OR LIMIT DOCKER’S WARRANTY OR LIABILITY FOR LOSSES WHICH MAY NOT BE LAWFULLY EXCLUDED OR LIMITED BY APPLICABLE LAW. YOU UNDERSTAND AND ACKNOWLEDGE THAT THE LICENSED SOFTWARE IS NOT DESIGNED, INTENDED OR WARRANTED FOR USE IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE CONTROLS, INCLUDING WITHOUT LIMITATION, OPERATION OF NUCLEAR FACILITIES, AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, AND LIFE SUPPORT OR WEAPONS SYSTEMS. - -7. INDEMNIFICATION BY YOU - -You agree to hold harmless and indemnify Docker and its subsidiaries, affiliates, officers, agents, employees, advertisers, licensors, suppliers or partners from and against any third party claim arising from or in any way related to your breach of this Agreement, use of the Licensed Software, or violation of applicable laws, rules or regulations in connection with the Licensed Software, including any liability or expense arising from all claims, losses, damages (actual and consequential), suits, judgments, litigation costs and attorneys’ fees, of every kind and nature. - -8. LIMITATION OF LIABILITY - -YOU EXPRESSLY UNDERSTAND AND AGREE THAT DOCKER, ITS SUBSIDIARIES AND AFFILIATES, AND ITS LICENSORS SHALL NOT BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL CONSEQUENTIAL OR EXEMPLARY DAMAGES INCURRED BY YOU, HOWEVER CAUSED AND UNDER ANY THEORY OF LIABILITY, INCLUDING, BUT NOT LIMITED TO, ANY LOSS OF PROFIT (WHETHER INCURRED DIRECTLY OR INDIRECTLY), ANY LOSS OF GOODWILL OR BUSINESS REPUTATION, ANY LOSS OF DATA SUFFERED, COST OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR OTHER INTANGIBLE LOSS. THE FOREGOING LIMITATIONS ON DOCKER’S LIABILITY SHALL APPLY WHETHER OR NOT DOCKER HAS BEEN ADVISED OF OR SHOULD HAVE BEEN AWARE OF THE POSSIBILITY OF ANY SUCH LOSSES ARISING. NOTWITHSTANDING THE FOREGOING, NOTHING HEREIN SHALL EXCLUDE OR LIMIT DOCKER’S LIABILITY FOR LOSSES WHICH MAY NOT BE LAWFULLY EXCLUDED OR LIMITED BY APPLICABLE LAW. THE TOTAL LIABILITY OF DOCKER ARISING OUT OF OR RELATED TO THIS AGREEMENT WILL NOT EXCEED USD $100. - -9. EXPORT RESTRICTIONS - -You understand that the software provided under this Agreement may contain encryption technology and other software programs that may require an export license from the U.S. State Department and that export or re-export of the software to certain entities (such as a foreign government and its subdivisions) and certain countries is prohibited. You agree that you will comply with all applicable export and import control laws and regulations of the United States and the foreign jurisdiction in which the software is used and, in particular, You will not export or re-export the software without all required United States and foreign government licenses. You will defend, indemnify, and hold harmless Docker and its suppliers and licensors from and against any violation of such laws or regulations by you or any of your agents, officers, directors or employees. - -10. MISCELLANEOUS - -The Licensed Software and any other software covered under this Agreement are “commercial items” as that term is defined at 48 C.F.R. 2.101; consisting of “commercial computer software” and “commercial computer software documentation” as such terms are used in 48 C.F.R. 12.212. Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4, all U.S. Government end users acquire the Licensed Software and any other software and documentation covered under this Agreement with only those rights set forth herein. This Agreement will be governed by the laws of the State of California without reference to conflict of law principles. Each party agrees to submit to the exclusive jurisdiction of the courts located within the county of Santa Clara, California to resolve any legal matter arising from this Agreement. You may not assign any of your rights or obligations under this Agreement, whether by operation of law or otherwise, without the prior written consent of Docker. Notwithstanding the foregoing, Docker may assign the entirety of its rights and obligations under this Agreement without your consent. The application of the UN Convention of International Sale of Goods to this Agreement is disclaimed in its entirety. This Agreement constitutes the entire agreement between You and Docker governing your use of the Licensed Software and supersedes any prior agreements between You and Docker, including but not limited to, any prior versions of this Agreement. The failure of Docker to enforce its rights under this Agreement at any time for any period shall not be construed as a waiver of such rights. If any provision of this Agreement is held invalid or unenforceable, the remainder of this Agreement will continue in full force and effect and the invalid or unenforceable provision shall be reformed to the extent necessary to make it valid and enforceable. diff --git a/docs/licenses/elasticsearch.txt b/docs/licenses/elasticsearch.txt deleted file mode 100644 index 0caa9c0c3d..0000000000 --- a/docs/licenses/elasticsearch.txt +++ /dev/null @@ -1,57 +0,0 @@ -https://www.elastic.co/licensing/elastic-license - -Elastic License 2.0 (ELv2) - -Elastic License - -Acceptance - -By using the software, you agree to all of the terms and conditions below. - -Copyright License - -The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below. - -Limitations - -You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software. - -You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key. - -You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law. - -Patents - -The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company. - -Notices - -You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms. - -If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software. - -No Other Rights - -These terms do not imply any licenses other than those expressly granted in these terms. - -Termination - -If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently. - -No Liability - -As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim. - -Definitions - -The licensor is the entity offering these terms, and the software is the software the licensor makes available under these terms, including any portion of it. - -you refers to the individual or entity agreeing to these terms. - -your company is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. control means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. - -your licenses are all the licenses granted to you for the software under these terms. - -use means anything you do with the software requiring one of your licenses. - -trademark means trademarks, service marks, and similar rights. diff --git a/docs/licenses/git.txt b/docs/licenses/git.txt deleted file mode 100644 index b0f26755c2..0000000000 --- a/docs/licenses/git.txt +++ /dev/null @@ -1,7 +0,0 @@ -https://git-scm.com/about/free-and-open-source - -Free and Open Source - -Git is released under the GNU General Public License version 2.0, which is an open source license. The Git project chose to use GPLv2 to guarantee your freedom to share and change free software---to make sure the software is free for all its users. - -However, we do restrict the use of the term "Git" and the logos to avoid confusion. Please see our trademark policy for details. diff --git a/docs/licenses/github.txt b/docs/licenses/github.txt deleted file mode 100644 index dde225504b..0000000000 --- a/docs/licenses/github.txt +++ /dev/null @@ -1,358 +0,0 @@ -https://github.com/customer-terms/general-terms - -GitHub Customer Agreement - -This Agreement consists of the General Terms, the applicable Product Specific Terms and any additional terms -GitHub presents when an order is placed. The Agreement takes effect when Customer accepts the General -Terms, and the individual who accepts these General Terms represents that they are authorized to enter into -this Agreement on behalf of Customer. - -GitHub General Terms - -These General Terms apply to all of Customer’s orders under this Agreement. Capitalized terms have the -meanings given under Definitions. - -1 License to use GitHub Products - -1.1 License grant. Products are licensed and not sold. Subject to Customer’s compliance with this -Agreement, GitHub grants to Customer a nonexclusive and limited license to install and use the -Products ordered as provided in the applicable Product Specific Terms and this Agreement. The licenses -are only for Customer’s internal business purposes and are non-transferable except as expressly -permitted under this Agreement or applicable law. - -1.2 Duration of licenses. Licenses expire at the end of the applicable Subscription Term unless renewed. - -1.3 Accounts. Customer may assign each Subscription License to one individual End User for use or access -on any number of devices. Customer may not reassign a Subscription License to another End User -within 90 days of the last assignment, except where End User’s relationship with Customer ends or End -User goes on leave. End User accounts may not be shared by individuals. - -1.4 End Users. Customer controls access to and use of the Products by End Users. Customer is responsible -for End Users’ use. - -1.5 Product Specific Terms. Product Specific Terms apply to Products, such as GitHub AE, GitHub Enterprise -Cloud and GitHub Enterprise Server. If there is a conflict between the Product Specific Terms and the -General Terms, the Product Specific Terms apply for that Product. - -1.6 Previews. Previews are provided “AS-IS”, “WITH ALL FAULTS” and “AS AVAILABLE”. GitHub may change -or discontinue Previews at any time without notice. - -1.7 Product changes. GitHub has the right to make changes to the Products if such changes do not -materially lessen the Product’s functionality. GitHub may provide additional terms that apply to -Customer’s use of updates, new features or related software. - -1.8 Affiliates. Customer’s Affiliates may use the Products under this Agreement. Customer is responsible for -its Affiliates’ use, and Customer has the sole right to enforce this Agreement. - -1.9 Compliance with laws. Customer’s use of the Products must not violate any applicable laws, including -copyright or trademark laws, export control laws and regulations, including laws and regulations in its -jurisdiction. - -1.10 Reservation of rights. Products are protected by copyright and other intellectual property laws and -international treaties. GitHub reserves all rights not expressly granted in this Agreement, and no rights -are granted or implied by waiver or estoppel. - -1.11 Feedback. Feedback by Customer is voluntary and may be used by GitHub for any purpose without -obligation of any kind. - -1.12 Restrictions. Unless expressly permitted in this Agreement or by law, Customer may not: -(a) reverse engineer, decompile, or disassemble any Product, or try to do so; -(b) run, upgrade or downgrade, or transfer parts of a Product separately at different times or on -different devices; -(c) install, use, or distribute other software or technology in any way that makes -GitHub’s intellectual property or technology subject to any other license terms; -(d) work around technical limitations in a Product or restrictions in Product documentation; or -(e) sell, rent, lease, sublicense, distribute or lend any Products to others, in whole or in part, or host -Products for use by others. - -2 Support - -Support. GitHub will Support a generally available release of a Product for one year from the original -release date or 6 months from the last generally available update of such release, whichever is longer. -GitHub is not responsible for Support if (a) someone other than a GitHub Representative modifies the -Products or (b) Customer uses the Products in a manner unauthorized by the Agreement or Product -documentation. -3 Data Protection - -Personal Data. Customer and GitHub will comply with applicable data protection laws. - -4 Confidentiality - -4.1 Existing NDA. If the parties have entered into a non-disclosure agreement, those terms apply instead of -this confidentiality section. - -4.2 Confidential Information. “Confidential Information” is non-public information in any form that is -marked as “confidential” or that a reasonable person should understand is confidential. This includes, -but is not limited to, Customer Content, the terms of this Agreement and Customer’s account -authentication credentials. -Confidential Information does not include information that: -(a) becomes publicly available without a breach of a confidentiality obligation; -(b) was received lawfully from another source without a confidentiality obligation; -(c) is independently developed; or -(d) is Feedback. - -4.3 Protection of Confidential Information. Each party will take reasonable steps to protect the other’s -Confidential Information. A party will only use the other party’s Confidential Information as part of the -parties’ business relationship. Neither party will disclose Confidential Information to third parties. A -party may only share Confidential Information with a party’s Representatives on a need-to-know basis, -under nondisclosure obligations at least as protective as this Agreement. Each party remains -responsible for the use of Confidential Information by its Representatives. A party must promptly notify -the other party if it discovers any unauthorized use or disclosure. - -4.4 Disclosure required by law. A party may disclose the other’s Confidential Information if required by law, -but only after it notifies the other party (if legally permissible) so that the other party can seek a -protective order. - -4.5 Residual information. Neither party is required to restrict its Representatives in other work assignments -if they have had access to Confidential Information. Each party agrees that the use of information -retained in Representatives’ unaided memories in the development or deployment of the parties’ -respective products or services does not create liability under this Agreement or trade secret law. - -4.6 Duration of confidentiality obligation. These confidentiality obligations apply (1) for Customer Content, -until it is deleted from the Online Services; and (2) for all other Confidential Information, for a period of -five years after a party receives the Confidential Information. - -5 Warranties - -5.1 Limited warranties and remedies. -(a) Online Services. GitHub warrants that the Online Services will perform in accordance with the -applicable SLA during Customer’s use. Customer’s remedies for breach of this warranty are -described in the SLA. -(b) Software. GitHub warrants that the Software will perform substantially as described in the -applicable Product documentation for one year from the date Customer acquires a license for that -version. If it does not and Customer notifies GitHub within the warranty term, GitHub will at its -option (a) return the price Customer paid for the Software license or (b) repair or replace the -Software. -(c) Support. GitHub warrants that it will perform Support in accordance with the applicable Support - -program. Customer’s remedies for breach of this warranty are described in the Support programs. -The remedies above are Customer’s sole remedies for breach of the warranties. Customer waives any -warranty claims not made during the warranty period. - -5.2 Exclusions. The warranties in this Agreement do not apply to problems caused by accident, abuse, or -use inconsistent with this Agreement, including failure to meet minimum system requirements. These -warranties do not apply to Previews. - -5.3 Disclaimer. Except for the limited warranties above and subject to applicable law, GitHub provides no -other warranties. It disclaims any other express, implied or statutory warranties, including warranties of -quality, title, non-infringement, merchantability, and fitness for a particular purpose. - -6 Third party claims - -6.1 The parties will defend each other against third party claims described in this section and will pay the -amount of any resulting adverse final judgment or approved settlement, but only if the defending party -is promptly notified in writing of the claim and has the right to control the defense and any settlement -of it. - -6.2 The party being defended must provide the defending party with all requested assistance, information, -and authority. The defending party will then reimburse the other party for reasonable out-of-pocket -expenses it incurs in providing such assistance. - -6.3 This section describes the parties’ sole remedies and entire liability for such claims. -(a) By GitHub. GitHub will defend Customer against any third-party claim that a Product made -available by GitHub for a fee and used within the scope of this Agreement (unmodified as provided -by GitHub and not combined with anything else), misappropriated a trade secret or directly -infringes a patent, copyright, trademark, or other proprietary right of a third party. If GitHub is -unable to resolve a claim of misappropriation or infringement, it may, at its option, either (1) -modify or replace the Product with a functional equivalent or (2) terminate Customer’s license and -refund any license fees, including amounts paid in advance for any usage period after the -termination date. GitHub will not be liable for any claims or damages due to Customer’s continued -use of a Product after being notified to stop due to a third-party claim. -(b) By Customer. To the extent permitted by applicable law, Customer will defend GitHub and its -Affiliates against any third-party claim that: (1) any Customer Content misappropriated a trade -secret or directly infringes a patent, copyright, trademark, or other proprietary right of a third -party; or (2) Customer’s use of any Product, alone or in combination with anything else, violates -the law or harms a third party. - -7 Limitation of liability - -7.1 Each party’s maximum, aggregate liability to the other under this Agreement is limited to direct -damages finally awarded in an amount not to exceed the following: -(a) Products. For Products ordered on a subscription basis, GitHub’s maximum liability to Customer for -any incident giving rise to a claim will not exceed the amount Customer paid for the Product during -the 12 months before the incident. -(b) Previews. For Previews, GitHub’s maximum liability is limited to US $5,000. -(c) Exclusions. In no event will either party be liable for indirect, incidental, special, punitive, or -consequential damages, or loss of use, loss of profits, or interruption of business; however caused -or on any theory of liability. -(d) Exceptions. No limitation or exclusions will apply to liability arising out of either party’s (1) -confidentiality obligations (except for liability related to Customer Content, which is subject to the -above limitation for Products); (2) defense obligations above; or (3) violation of the other party’s -intellectual property rights. - -8 Pricing and payment - -8.1 Fees. Customer agrees to pay fees in full, up front and, if invoiced, within thirty (30) days of the invoice -date. Amounts payable are non-refundable, except as stated in this Agreement regarding Product -warranty and third-party claims. If billed based on usage, GitHub will invoice according to the billing -model described in the Product documentation. - -8.2 Late payment. If Customer fails to pay fees on time, GitHub has the right to charge 2% monthly interest -on past due amounts as allowed by law. GitHub also has the right to charge Customer for all expenses of -recovery, to terminate the applicable order, turn off access and to take any other action at law. - -8.3 Taxes. Customer is solely responsible for all taxes, fees, duties and governmental assessments (except -for taxes based on GitHub’s net income) that are imposed or become due in connection with this -Agreement. If any taxes are required to be withheld on payments invoiced by GitHub, Customer may -deduct such taxes from the amount owed and pay them to the appropriate taxing authority, but only if -Customer promptly provides GitHub an official receipt for those withholdings and other documents -reasonably requested to allow GitHub to claim a foreign tax credit or refund. Customer will ensure that -any taxes withheld are minimized to the extent possible under applicable law. - -9 Term and termination - -9.1 Term. This Agreement is effective until terminated by a party, as described below. - -9.2 Termination without cause. Either party may terminate this Agreement without cause on 30 days’ -notice. Licenses granted on a subscription basis will continue for the duration of the Subscription Term, -subject to the terms of this Agreement. - -9.3 Termination for cause. Without limiting other remedies, either party may terminate this Agreement for -material breach immediately if the other party fails to cure a curable breach within a 30-day notice -period. Upon such termination: -(a) All licenses granted under this Agreement will terminate immediately. -(b) All amounts due under any unpaid invoices will become due and payable immediately. -(c) If GitHub is in breach, Customer will be reimbursed for any prepaid unused fees. - -9.4 Migration. Customer may migrate or request migration of the data in its repositories for up to ninety -(90) days after termination of this Agreement. Customer may not use the Products on a production -basis during that time. - -9.5 Termination for regulatory reasons. GitHub may modify, discontinue, or terminate a Product in any -country or jurisdiction where there is any current or future government regulation, obligation, or other -requirement, that (1) is not generally applicable to businesses operating there; (2) presents a hardship -for GitHub to continue offering the Product without modification; or (3) causes GitHub to believe these -terms or the Product may conflict with any such regulation, obligation, or requirement. If GitHub -terminates a subscription for regulatory reasons, Customer will receive, as its sole remedy, a -reimbursement for any prepaid, unused subscription fees. - -10 Miscellaneous - -10.1 Independent contractors. The parties are independent contractors. Customer and GitHub may develop -products independently without using the other’s Confidential Information. - -10.2 Amendments. GitHub may require Customer to accept revised or additional terms before processing a -new order. Any additional or conflicting terms and conditions presented by Customer are expressly -rejected and will not apply. - -10.3 Order of precedence. Conflicting terms in the Product Specific Terms take precedence over these -General Terms as to the applicable Products. The parties may agree on changes to Section 8 Pricing and -payment and Section 10.11 Law and venue by a signed order form. Other than that, these General -Terms will take precedence over any conflicting terms in other documents. - -10.4 Assignment. Either party may assign this Agreement to an Affiliate but it must notify the other party in -writing of the assignment. GitHub may also assign its rights to receive payment and enforce Customer’s -payment obligations. Any other assignment of this Agreement must be approved by the other party in -writing. Such notification to GitHub shall be made to the account manager at GitHub. Any attempted -assignment without required approval will be void. - -10.5 Compliance with trade laws. The parties acknowledge that the Products may be subject to U.S. and -other countries’ export jurisdictions. Each party will comply with all laws and regulations applicable to -the import or export of the Products, including, but not limited to, the U.S. Export Administration -Regulations, International Traffic in Arms Regulations, and sanctions regulations administered by the U.S. -Office of Foreign Assets Control (“trade laws”). Customer will not take any action that causes GitHub to -violate U.S. or other applicable trade laws. If Customer learns of a potential violation of trade laws -relating to the performance of this Agreement, or a potential violation of the terms in this subsection, it -will alert GitHub as soon as possible, but in no event more than 14 days after acquiring this knowledge. -GitHub may suspend or terminate this Agreement to the extent that it reasonably concludes that -performance would cause it to violate U.S. or other applicable trade laws, including those described -above, or put it at risk of becoming the subject of economic sanctions under such trade laws. - -10.6 Severability. If any part of this Agreement is held to be unenforceable, the rest of the Agreement will -remain in full force and effect. - -10.7 Waiver. Failure to enforce any provision of this Agreement will not constitute a waiver. Any waiver -must be in writing and signed by the waiving party. - -10.8 No third-party beneficiaries. This Agreement does not create any third-party beneficiary rights except -as expressly provided by its terms. - -10.9 Survival. All provisions survive termination of this Agreement except those requiring performance only -during the term of the Agreement. - -10.10 Notices. Notices to GitHub may be submitted via email to legal@support.github.com. If Customer -wishes to formally service notice on GitHub, it must be made through GitHub’s registered agent: -GitHub, Inc. -c/o Corporation Service Company -2710 Gateway Oaks Drive, Suite 150N -Sacramento, CA 95833-3505 -Notices must be in writing and will be treated as delivered on the date received at the address, date -shown on the return receipt, email transmission date, or date on the courier confirmation of delivery. -Notices to Customer will be sent to the individual at the address Customer identifies on its account as -its contact for notices. GitHub may send notices and other information to Customer by email or other -electronic form. - -10.11 Applicable law and venue. This Agreement will be governed by and construed in accordance with the -laws of the State of California and federal laws of the United States. Any legal action or proceeding will -be brought exclusively in the federal or state courts located in the Northern District of California. The -parties consent to personal jurisdiction and venue there. -If Customer’s principal office is within the European Union, European Economic Area or Switzerland, -however, this Agreement will be governed by the laws of Ireland. Any legal action or proceeding will -be brought exclusively in the courts located in Dublin. The parties consent to personal jurisdiction -and venue there. -The above choices of venue do not prevent either party from seeking injunctive relief in any -jurisdiction with respect to a violation of intellectual property rights or confidentiality obligations. -The 1980 United Nations Convention on Contracts for the International Sale of Goods and its related -instruments will not apply to this Agreement. - -10.12 GitHub Affiliates and contractors. GitHub may perform its obligations under this Agreement through its -Affiliates and use contractors to provide certain services. GitHub remains responsible for their -performance. - -10.13 U.S. Public Sector Amendment. The U.S. Public Sector Amendment applies if you are a Government -Entity (as defined in that amendment). - -10.14 Government procurement rules. By accepting this Agreement, Customer represents and warrants that -(1) it has complied and will comply with all applicable government procurement laws and regulations; -(2) it is authorized to enter into this Agreement; and (3) this Agreement satisfies all applicable -procurement requirements. - -11 Definitions - -“Affiliate” means any legal entity that controls, is controlled by, or is under common control with a -party. In this context control means ownership of more than a 50% interest in an entity. - -“Content” means text, data, software, images and any other materials that are displayed or otherwise -made available through the Online Service. - -“Customer” means the entity that has entered into this Agreement. - -“Customer Content” means Content that Customer creates, owns, or to which Customer holds the -rights. - -“End User” means any person or machine account that Customer permits to use a Product or access -Customer Content. - -“Feedback” means a comment or suggestion volunteered by a party about the other party’s business, -products or services. - -“GitHub” means GitHub, Inc. - -“Online Service” means the GitHub-hosted service to which Customer may subscribe under this -Agreement. - -“Previews” means Products provided for preview, evaluation, demonstration or trial purposes, or prerelease versions of the Products. - -“Product” means all Software, Online Services and Additional Products and Features that GitHub offers, -including Previews, updates, patches, bug fixes and support provided by GitHub. - -“Product Specific Terms” means the additional product terms that apply to Products available under -this Agreement. The Product Specific Terms are provided at github.com/enterprise-legal. - -“Representatives” means a party’s employees, Affiliates, contractors, advisors and consultants. - -“SLA” means GitHub Online Services SLA, available at github.com/enterprise-legal, which specifies the -minimum service level for the Online Services. - -“Software” means licensed copies of the on-premises software GitHub Enterprise Server identified in -the Product Specific Terms, including any generally available updates of the Software. - -“Subscription License” means the license assigned to an End User. - -“Subscription Term” means the license period agreed between the parties when the Products are -ordered. - -“Support” means GitHub’s support programs described on github.com/support-enterprise. - -“U.S. Public Sector Amendment” means the U.S. Public Sector Amendment available at -github.com/enterprise-legal. diff --git a/docs/licenses/gitlab.txt b/docs/licenses/gitlab.txt deleted file mode 100644 index 4aeb38fcdf..0000000000 --- a/docs/licenses/gitlab.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://gitlab.com/gitlab-org/gitlab-foss/blob/master/LICENSE - -Copyright (c) 2011-present GitLab B.V. - -Portions of this software are licensed as follows: - -* All content residing under the "doc/" directory of this repository is licensed under "Creative Commons: CC BY-SA 4.0 license". -* All content that resides under the "ee/" directory of this repository, if that directory exists, is licensed under the license defined in "ee/LICENSE". -* All content that resides under the "jh/" directory of this repository, if that directory exists, is licensed under the license defined in "jh/LICENSE". -* All client-side JavaScript (when served directly or after being compiled, arranged, augmented, or combined), is licensed under the "MIT Expat" license. -* All third party components incorporated into the GitLab Software are licensed under the original license provided by the owner of the applicable component. -* Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/google-cloud-sdk.txt b/docs/licenses/google-cloud-sdk.txt deleted file mode 100644 index 9252b5b5b4..0000000000 --- a/docs/licenses/google-cloud-sdk.txt +++ /dev/null @@ -1,22 +0,0 @@ -$ tar --extract --gunzip --to-stdout --file google-cloud-cli-404.0.0-darwin-x86_64.tar.gz google-cloud-sdk/LICENSE | cat - -The Google Cloud CLI and its source code are licensed under Apache -License v. 2.0 (the "License"), unless otherwise specified by an alternate -license file. - -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Note that if you use the Google Cloud CLI with any Google Cloud Platform -products, your use is additionally going to be governed by the license agreement -or terms of service, as applicable, of the underlying Google Cloud Platform -product with which you are using the Google Cloud CLI. For example, if you are -using the Google Cloud CLI with Google App Engine, your use would additionally -be governed by the Google App Engine Terms of Service. - -This also means that if you were to create works that call Google APIs, you -would still need to agree to the terms of service (usually, Google's -Developer Terms of Service at https://developers.google.com/terms) for those -APIs separately, as this code does not grant you any special rights to use -the services. diff --git a/docs/licenses/grafana.txt b/docs/licenses/grafana.txt deleted file mode 100644 index a68d5d690d..0000000000 --- a/docs/licenses/grafana.txt +++ /dev/null @@ -1,47 +0,0 @@ -https://grafana.com/legal/grafana-labs-license/?plcmt=footer - -Grafana Labs License Agreement - -PLEASE READ CAREFULLY: THIS GRAFANA LABS LICENSE AGREEMENT (THIS “AGREEMENT”), WHICH CONSTITUTES A LEGALLY BINDING AGREEMENT AND GOVERNS ALL OF YOUR USE OF ALL OF THE GRAFANA LABS SOFTWARE WITH WHICH THIS AGREEMENT IS INCLUDED (“GRAFANA LABS SOFTWARE”) THAT IS PROVIDED IN OBJECT CODE FORMAT. BY INSTALLING OR USING ANY OF THE GRAFANA LABS SOFTWARE GOVERNED BY THIS AGREEMENT, SUCH AS THE FREE VERSION OF GRAFANA ENTERPRISE, GRAFANA ENTERPRISE METRICS, OR GRAFANA ENTERPISE LOGS, YOU ARE ASSENTING TO THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE WITH SUCH TERMS AND CONDITIONS, YOU MAY NOT INSTALL OR USE THE GRAFANA LABS SOFTWARE GOVERNED BY THIS AGREEMENT. IF YOU ARE INSTALLING OR USING THE GRAFANA LABS SOFTWARE ON BEHALF OF A LEGAL ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT ON BEHALF OF SUCH ENTITY. Posted Date: August 5, 2021. This Agreement is entered into by and between Raintank, Inc. dba Grafana Labs (“Grafana Labs") and You, or the legal entity on behalf of whom You are acting (as applicable, “You”). - -1 OBJECT CODE END USER LICENSES, RESTRICTIONS AND THIRD PARTY OPEN SOURCE SOFTWARE - -1.1 Object Code End User License. Subject to the terms and conditions of Section 1.2 of this Agreement, Grafana Labs hereby grants to You, AT NO CHARGE and for so long as you are not in breach of any provision of this Agreement, a License to the free features and functions of the Grafana Enterprise, Grafana Enterprise Metrics, and/or Grafana Enterprise Logs software, as applicable. - -1.2 Reservation of Rights; Restrictions. As between Grafana Labs and You, Grafana Labs and its licensors own all right, title and interest in and to the Grafana Labs Software, and any related documentation or other intellectual property rights, and except as expressly set forth in Sections 1.1 of this Agreement, no other license to the Grafana Labs Software is granted to You under this Agreement, by implication, estoppel or otherwise. You agree not to: (i) reverse engineer or decompile, decrypt, disassemble or otherwise reduce any Grafana Labs Software provided to You in Object Code, or any portion thereof, to Source Code, except and only to the extent any such restriction is prohibited by applicable law, or otherwise build a competitive product or service, build a product or service using similar ideas, features, functions or graphics, or create any compilations or derivative works thereof, (ii) except as expressly permitted in Section 1.1 above, transfer, sell, rent, lease, distribute, sublicense, loan or otherwise transfer or commercially exploit or make available, Grafana Labs Software Object Code, in whole or in part, to any third party; (iii) use Grafana Labs Software Object Code for providing time-sharing services, any software-as-a-service, service bureau services or as part of an application services provider or other service offering (collectively, “SaaS Offering”) where obtaining access to the Grafana Labs Software or the features and functions of the Grafana Labs Software is a primary reason or substantial motivation for users of the SaaS Offering to access and/or use the SaaS Offering (“Prohibited SaaS Offering”); (iv) circumvent the limitations on use of Grafana Labs Software provided to You in Object Code format that are imposed or preserved by any License Key, (v) alter or remove any Marks and Notices in the Grafana Labs Software, or (vi) violate any of Grafana Labs’ posted policies regarding its Marks and Notices, including its Trademark Usage Policy available at: https://grafana.com/trademark-policy/. If You have any question as to whether a specific SaaS Offering constitutes a Prohibited SaaS Offering, or are interested in obtaining Grafana Labs ’s permission to engage in commercial or non-commercial distribution of the Grafana Labs Software, please contact Grafana Labs at sales@grafana.com - -1.3 Third Party Open Source Software. The Grafana Labs Software may contain or be provided with third party open source libraries, components, utilities and other open source software (collectively, “Open Source Software”), which Open Source Software may have applicable license terms as identified on a website designated by Grafana Labs . Notwithstanding anything to the contrary herein, use of the Open Source Software shall be subject to the license terms and conditions applicable to such Open Source Software, to the extent required by the applicable licensor (which terms shall not restrict the license rights granted to You hereunder, but may contain additional rights). To the extent any condition of this Agreement conflicts with any license to the Open Source Software, the Open Source Software license will govern with respect to such Open Source Software only. Grafana Labs may also separately provide you with certain open source software that is licensed by Grafana Labs . Your use of such Grafana Labs open source software will not be governed by this Agreement, but by the applicable open source license terms. - -2 TERMINATION - -2.1 Termination. This Agreement will automatically terminate, whether or not You receive notice of such Termination from Grafana Labs, if You breach any of its provisions. - -2.2 Post Termination. Upon any termination of this Agreement, for any reason, You shall promptly cease the use of the commercial Grafana Labs Software in Object Code format. For the avoidance of doubt, termination of this Agreement will not affect Your right to use Grafana Labs Software, in either Object Code or Source Code formats, made available under the AGPLv3 License. - -2.3 Survival. Sections 1.2, 2.2. 2.3, 3, 4, 5, and 6 shall survive any termination or expiration of this Agreement. - -3 DISCLAIMER OF WARRANTIES AND LIMITATION OF LIABILITY - -3.1 Disclaimer of Warranties. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE LAW, THE GRAFANA LABS SOFTWARE IS PROVIDED “AS IS” WITHOUT WARRANTY OF ANY KIND, AND GRAFANA LABS AND ITS LICENSORS MAKE NO WARRANTIES WHETHER EXPRESSED, IMPLIED OR STATUTORY REGARDING OR RELATING TO THE GRAFANA LAB SOFTWARE. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE LAW, GRAFANA LABS AND ITS LICENSORS SPECIFICALLY DISCLAIM ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, AS WELL AS ANY WARRANTIES OF REGULATORY COMPLIANCE, PERFORMANCE, ACCURACY, RELIABILITY, TITLE, AND NON-INFRINGEMENT. FURTHER, GRAFANA LABS DOES NOT WARRANT THAT THE GRAFANA LABS SOFTWARE WILL BE ERROR FREE OR UNINTERRUPTED. - -3.2 Limitation of Liability. IN NO EVENT SHALL GRAFANA LABS OR ITS LICENSORS BE LIABLE TO YOU OR ANY THIRD PARTY FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE, OR CONSEQUENTIAL DAMAGES, INCLUDING BUT NOT LIMITED TO, LOSS OF PROFITS, LOSS OF USE LOSS OF DATA, BUSINESS INTERRUPTION, COST OF SUBSTITUTE GOODS OR SERVICES, OR OTHER COMMERCIAL DAMAGES OR LOSSES ARISING OUT OF OR IN CONNECTION WITH THIS AGREEMENT, HOWEVER CAUSED AND WHETHER IN CONTRACT, TORT OR UNDER ANY OTHER THEORY OF LIABILITY AND WHETHER OR NOT YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -4 MISCELLANEOUS This Agreement completely and exclusively states the entire agreement of the parties regarding the subject matter herein, and it supersedes, and its terms govern, all prior proposals, agreements, or other communications between the parties, oral or written, regarding such subject matter. This Agreement may be modified by Grafana Labs from time to time, and any such modifications will be effective upon the “Posted Date” set forth at the top of the modified Agreement. If any provision hereof is held unenforceable, this Agreement will continue without said provision and be interpreted to reflect the original intent of the parties. This Agreement and any non-contractual obligation arising out of or in connection with it, is governed exclusively by New York law. This Agreement shall not be governed by the 1980 UN Convention on Contracts for the International Sale of Goods. All disputes arising out of or in connection with this Agreement, including its existence and validity, shall be resolved by the courts with jurisdiction in New York City, USA, except where mandatory law provides for the courts at another location in The United States to have jurisdiction. The parties hereby irrevocably waive any and all claims and defenses either might otherwise have in any such action or proceeding in any of such courts based upon any alleged lack of personal jurisdiction, improper venue, forum non conveniens or any similar claim or defense. A breach or threatened breach, by You of Section 1 may cause irreparable harm for which damages at law may not provide adequate relief, and therefore Grafana Labs shall be entitled to seek injunctive relief without being required to post a bond. You may not assign this Agreement (including by operation of law in connection with a merger or acquisition), in whole or in part to any third party without the prior written consent of Grafana Labs, which may be withheld or granted by Grafana Labs in its sole and absolute discretion. Any assignment in violation of the preceding sentence is void. Notices to Grafana Labs may also be sent to legal@grafana.com. - -5 VERIFICATION. You will maintain accurate records of Your use of the Grafana Labs Software sufficient to show compliance with the terms of this Agreement. On reasonable notice, Grafana Labs may audit Your use of the Grafana Labs Software to confirm Your compliance with the terms of this Agreement, provided such audit does not unreasonably interfere with Your business activities. You will reasonably cooperate with Grafana Labs and/or any third party auditor, and will, without prejudice to other rights of Grafana Labs, address any non-compliance identified by the audit within thirty (30) days after such audit. Grafana Labs may also, at any time on request, require You to furnish Grafana Labs with information necessary to verify that Your use of the Grafana Labs Software is in compliance with the terms of this Agreement. - -6 DEFINITIONS The following terms have the meanings ascribed: - -6.1 “Affiliate” means, with respect to a party, any entity that controls, is controlled by, or which is under common control with, such party, where “control” means ownership of at least fifty percent (50%) of the outstanding voting shares of the entity, or the contractual right to establish policy for, and manage the operations of, the entity. - -6.2 “Grafana Labs Software” means all of the Grafana Labs software with which this Agreement is included, including but not limited to the free features and functions of the Grafana Enterprise, Grafana Enterprise Metrics, and Grafana Enterprise Logs software. - -6.3 “License” means a limited, non-exclusive, non-transferable, fully paid up, royalty free, right and license, without the right to grant or authorize sublicenses, solely for Your internal business operations to (i) install and use the applicable Features and Functions of the Grafana Labs Software in Object Code, and (ii) permit Contractors and Your Affiliates to use the Grafana Labs software as set forth in (i) above, provided that such use by Contractors must be solely for Your benefit and/or the benefit of Your Affiliates, and You shall be responsible for all acts and omissions of such Contractors and Affiliates in connection with their use of the Grafana Labs software that are contrary to the terms and conditions of this Agreement. - -6.4 “License Key” means a sequence of bytes, including but not limited to a JSON blob, that is used to enable certain features and functions of the Grafana Labs Software. - -6.5 “Marks and Notices” means all Grafana Labs trademarks, trade names, logos and notices, including those present on the documentation as provided by Grafana Labs. - -6.6 “Object Code” means any form resulting from mechanical transformation or translation of Source Code form, including but not limited to compiled object code, generated documentation, and conversions to other media types. - -6.7 “Source Code” means the preferred form of computer software for making modifications, including but not limited to software source code, documentation source, and configuration files. diff --git a/docs/licenses/jq.txt b/docs/licenses/jq.txt deleted file mode 100644 index a815bd80d8..0000000000 --- a/docs/licenses/jq.txt +++ /dev/null @@ -1,137 +0,0 @@ -https://github.com/stedolan/jq/blob/master/COPYING - -jq is copyright (C) 2012 Stephen Dolan - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - - -jq's documentation (everything found under the docs/ subdirectory in -the source tree) is licensed under the Creative Commons CC BY 3.0 -license, which can be found at: - - https://creativecommons.org/licenses/by/3.0/ - -The documentation website includes a copy of Twitter's Boostrap and -relies on Bonsai, Liquid templates and various other projects, look -them up for detailed licensing conditions. - - - -jq incorporates David M. Gay's dtoa.c and g_fmt.c, which bear the -following notices: - -dtoa.c: -The author of this software is David M. Gay. - -Copyright (c) 1991, 2000, 2001 by Lucent Technologies. - -Permission to use, copy, modify, and distribute this software for any -purpose without fee is hereby granted, provided that this entire notice -is included in all copies of any software which is or includes a copy -or modification of this software and in all copies of the supporting -documentation for such software. - -THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED -WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY -REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY -OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - -g_fmt.c: -The author of this software is David M. Gay. - -Copyright (c) 1991, 1996 by Lucent Technologies. - -Permission to use, copy, modify, and distribute this software for any -purpose without fee is hereby granted, provided that this entire notice -is included in all copies of any software which is or includes a copy -or modification of this software and in all copies of the supporting -documentation for such software. - -THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED -WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY -REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY -OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - - - -jq uses parts of the open source C library "decNumber", which is distribured -under the following license: - - -ICU License - ICU 1.8.1 and later - -COPYRIGHT AND PERMISSION NOTICE - -Copyright (c) 1995-2005 International Business Machines Corporation and others -All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, and/or sell copies of the Software, and to permit persons -to whom the Software is furnished to do so, provided that the above -copyright notice(s) and this permission notice appear in all copies of -the Software and that both the above copyright notice(s) and this -permission notice appear in supporting documentation. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT -OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL -INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING -FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION -WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -Except as contained in this notice, the name of a copyright holder -shall not be used in advertising or otherwise to promote the sale, use -or other dealings in this Software without prior written authorization -of the copyright holder. - -Portions Copyright (c) 2016 Kungliga Tekniska Högskolan -(Royal Institute of Technology, Stockholm, Sweden). -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/lgtm.txt b/docs/licenses/lgtm.txt deleted file mode 100644 index 9cad059c2b..0000000000 --- a/docs/licenses/lgtm.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/lgtmco/lgtm/blob/master/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/locust.txt b/docs/licenses/locust.txt deleted file mode 100644 index 8bdd85bd16..0000000000 --- a/docs/licenses/locust.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/locustio/locust/blob/master/LICENSE - -The MIT License - -Copyright (c) 2009-2010, Carl Byström, Jonatan Heyman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/pycharm.txt b/docs/licenses/pycharm.txt deleted file mode 100644 index 8005b83be3..0000000000 --- a/docs/licenses/pycharm.txt +++ /dev/null @@ -1,44 +0,0 @@ -https://sales.jetbrains.com/hc/en-gb/articles/115001015290-Where-can-I-find-the-EULA-End-User-License-Agreement- - -EULA for Community editions: -Community Edition is open-source, licensed under Apache 2.0. -It can be used for commercial development. - ----------------------------------------- - -https://sales.jetbrains.com/hc/en-gb/articles/206544679-What-is-our-licensing-model- - -JetBrains adopted a subscription-based licensing model for selected products on November 2, 2015. This new model allows our customers to purchase monthly and yearly subscriptions for one or more products. - -Renewing your subscription will keep it active, allowing you to continuously install and use the latest versions of the Products as they are released. Please note, our subscription-based licensing model also grants a perpetual fallback license. - -Subscriptions of least 365 consecutive days qualify for a Perpetual Fallback License. The Perpetual Fallback license lets you keep using the subscribed to products forever at no additional cost if you have ever decided to not renew your subscription. However, the Perpetual Fallback License will be limited to the version of the product available at the time of purchase or 12 months prior. - -An All Products option is also available providing our customers with access to all our desktop products (IDEs, utilities, and extensions). - -For JetBrains Team Tools - - - TeamCity, YouTrack Standalone, and Upsource are covered under a normal perpetual licensing model. - - YouTrack InCloud and Space are covered under a subscription licensing model. - ----------------------------------------- - -https://sales.jetbrains.com/hc/en-gb/articles/207240845 - -What is a perpetual fallback license? - -A perpetual fallback license is a license that allows you to use a specific version of software without an active subscription for it. The license also includes all bugfix updates, more specifically in X.Y.Z version all Z releases are included. - -When purchasing an annual subscription, you will immediately get a perpetual fallback license for the exact version available at the time. - -If paying on a monthly basis, as soon as you pay for 12 consecutive months, you will receive this perpetual fallback license providing you with access to the exact product version for when your 12 consecutive months subscription started. You will receive perpetual fallback licenses for every version you’ve paid 12 consecutive months for. - -This option is available whether you subscribe to a single product or the ‘All Products’ option. - -If you are an Administrator, the version each subscription will continue working with after it expires is shown in the "Fallback Product" column. - -If you are a subscription license User, the version you can continue using after the subscription expires is shown at the main login page at https://account.jetbrains.com/login - -A detailed description on how to obtain and install a fallback license is available here. https://sales.jetbrains.com/hc/en-gb/articles/360001186840-How-do-I-Obtain-and-Install-my-fallback-perpetual-license- - -For related FAQ articles, please refer here. https://sales.jetbrains.com/hc/en-gb/sections/201619995-Our-Licensing-Model-and-JetBrains-Toolbox diff --git a/docs/licenses/python.txt b/docs/licenses/python.txt deleted file mode 100644 index 1384aa43ef..0000000000 --- a/docs/licenses/python.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://docs.python.org/3.12/license.html - -The MIT License - -Copyright (c) 2009-2010, Carl Byström, Jonatan Heyman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/arrow.txt b/docs/licenses/python/arrow.txt deleted file mode 100644 index ad2709403e..0000000000 --- a/docs/licenses/python/arrow.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/arrow-py/arrow/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2021 Chris Smith - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/atomicwrites.txt b/docs/licenses/python/atomicwrites.txt deleted file mode 100644 index f2fccbed48..0000000000 --- a/docs/licenses/python/atomicwrites.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/untitaker/python-atomicwrites/LICENSE - -Copyright (c) 2015-2016 Markus Unterwaditzer - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/attrs.txt b/docs/licenses/python/attrs.txt deleted file mode 100644 index e9b408a77e..0000000000 --- a/docs/licenses/python/attrs.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/python-attrs/attrs/LICENSE - -The MIT License (MIT) - -Copyright (c) 2015 Hynek Schlawack and the attrs contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/aws-requests-auth.txt b/docs/licenses/python/aws-requests-auth.txt deleted file mode 100644 index fa50d7d73e..0000000000 --- a/docs/licenses/python/aws-requests-auth.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/davidmuller/aws-requests-auth/LICENSE - -Copyright (c) David Muller. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/docs/licenses/python/awscli.txt b/docs/licenses/python/awscli.txt deleted file mode 100644 index 029d671328..0000000000 --- a/docs/licenses/python/awscli.txt +++ /dev/null @@ -1,14 +0,0 @@ -https://github.com/aws/aws-cli/LICENSE.txt - -Copyright 2012-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"). You -may not use this file except in compliance with the License. A copy of -the License is located at - - http://aws.amazon.com/apache2.0/ - -or in the "license" file accompanying this file. This file is -distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -ANY KIND, either express or implied. See the License for the specific -language governing permissions and limitations under the License. diff --git a/docs/licenses/python/bagit-profile.txt b/docs/licenses/python/bagit-profile.txt deleted file mode 100644 index 0e5faa29bb..0000000000 --- a/docs/licenses/python/bagit-profile.txt +++ /dev/null @@ -1,26 +0,0 @@ -https://github.com/bagit-profiles/bagit-profiles-validator/LICENSE - -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/docs/licenses/python/bagit.txt b/docs/licenses/python/bagit.txt deleted file mode 100644 index 9ad2255362..0000000000 --- a/docs/licenses/python/bagit.txt +++ /dev/null @@ -1,40 +0,0 @@ -https://github.com/LibraryOfCongress/bagit-python - -License cc0 - -Note: By contributing to this project, you agree to license your work under the same terms as those that govern this project's distribution. - ---------------------------------------- -https://creativecommons.org/publicdomain/zero/1.0/legalcode - -CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: - - - the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; - - moral rights retained by the original author(s) and/or performer(s); - - publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; - - rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; - - rights protecting the extraction, dissemination, use and reuse of data in a Work; - - database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and - - other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. - -4. Limitations and Disclaimers. - - - No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. - - Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. - - Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. - - Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. - diff --git a/docs/licenses/python/blessed.txt b/docs/licenses/python/blessed.txt deleted file mode 100644 index f857bc2f2b..0000000000 --- a/docs/licenses/python/blessed.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/jquast/blessed/LICENSE - -Copyright (c) 2014 Jeff Quast -Copyright (c) 2011 Erik Rose - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/blinker.txt b/docs/licenses/python/blinker.txt deleted file mode 100644 index 464bda694e..0000000000 --- a/docs/licenses/python/blinker.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/pallets-eco/blinker/LICENSE.txt - -Copyright 2010 Jason Kirtland - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/boto3-stubs.txt b/docs/licenses/python/boto3-stubs.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/boto3-stubs.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/boto3.txt b/docs/licenses/python/boto3.txt deleted file mode 100644 index 9d14ebe729..0000000000 --- a/docs/licenses/python/boto3.txt +++ /dev/null @@ -1,179 +0,0 @@ -https://github.com/boto/boto3/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS diff --git a/docs/licenses/python/botocore-stubs.txt b/docs/licenses/python/botocore-stubs.txt deleted file mode 100644 index 0e2e0c2fb7..0000000000 --- a/docs/licenses/python/botocore-stubs.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/botocore-stubs/LICENSE - -MIT License - -Copyright (c) 2022 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/botocore.txt b/docs/licenses/python/botocore.txt deleted file mode 100644 index 7ebadad4d9..0000000000 --- a/docs/licenses/python/botocore.txt +++ /dev/null @@ -1,179 +0,0 @@ -https://github.com/boto/botocore/LICENSE.txt - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS diff --git a/docs/licenses/python/brotli.txt b/docs/licenses/python/brotli.txt deleted file mode 100644 index 158d58d105..0000000000 --- a/docs/licenses/python/brotli.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/google/brotli/LICENSE - -Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/cachetools.txt b/docs/licenses/python/cachetools.txt deleted file mode 100644 index f5ef4e1176..0000000000 --- a/docs/licenses/python/cachetools.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/tkem/cachetools/LICENSE - -The MIT License (MIT) - -Copyright (c) 2014-2024 Thomas Kemmer - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/certifi.txt b/docs/licenses/python/certifi.txt deleted file mode 100644 index 5ad79c5b30..0000000000 --- a/docs/licenses/python/certifi.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/certifi/python-certifi/LICENSE - -This package contains a modified version of ca-bundle.crt: - -ca-bundle.crt -- Bundle of CA Root Certificates - -This is a bundle of X.509 certificates of public Certificate Authorities -(CA). These were automatically extracted from Mozilla's root certificates -file (certdata.txt). This file can be found in the mozilla source tree: -https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt -It contains the certificates in PEM format and therefore -can be directly used with curl / libcurl / php_curl, or with -an Apache+mod_ssl webserver for SSL client authentication. -Just configure this file as the SSLCACertificateFile.# - -***** BEGIN LICENSE BLOCK ***** -This Source Code Form is subject to the terms of the Mozilla Public License, -v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain -one at http://mozilla.org/MPL/2.0/. - -***** END LICENSE BLOCK ***** -@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $ diff --git a/docs/licenses/python/cffi.txt b/docs/licenses/python/cffi.txt deleted file mode 100644 index 7bd6d544e5..0000000000 --- a/docs/licenses/python/cffi.txt +++ /dev/null @@ -1,28 +0,0 @@ -https://github.com/python-cffi/cffi/LICENSE - - -Except when otherwise stated (look for LICENSE files in directories or -information at the beginning of each file) all software and -documentation is licensed as follows: - - The MIT License - - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation - files (the "Software"), to deal in the Software without - restriction, including without limitation the rights to use, - copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the - Software is furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - diff --git a/docs/licenses/python/chalice.txt b/docs/licenses/python/chalice.txt deleted file mode 100644 index 9a47aab2c2..0000000000 --- a/docs/licenses/python/chalice.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/aws/chalice/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/charset-normalizer.txt b/docs/licenses/python/charset-normalizer.txt deleted file mode 100644 index 5e5d4f97eb..0000000000 --- a/docs/licenses/python/charset-normalizer.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/Ousret/charset_normalizer/LICENSE - -MIT License - -Copyright (c) 2019 TAHRI Ahmed R. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/chevron.txt b/docs/licenses/python/chevron.txt deleted file mode 100644 index 3ad88fe026..0000000000 --- a/docs/licenses/python/chevron.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/noahmorrison/chevron/LICENSE - -The MIT License (MIT) - -Copyright (c) 2014 Noah Morrison - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/docs/licenses/python/click.txt b/docs/licenses/python/click.txt deleted file mode 100644 index b3b092f069..0000000000 --- a/docs/licenses/python/click.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/click/LICENSE.txt - -Copyright 2014 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/colorama.txt b/docs/licenses/python/colorama.txt deleted file mode 100644 index cc0487e0c3..0000000000 --- a/docs/licenses/python/colorama.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/tartley/colorama/LICENSE.txt - -Copyright (c) 2010 Jonathan Hartley -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holders, nor those of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/configargparse.txt b/docs/licenses/python/configargparse.txt deleted file mode 100644 index d6ea06a6b2..0000000000 --- a/docs/licenses/python/configargparse.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/bw2/ConfigArgParse/LICENSE - -The MIT License (MIT) - -Copyright (c) 2015 bw2 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/coverage.txt b/docs/licenses/python/coverage.txt deleted file mode 100644 index bb7a7416a1..0000000000 --- a/docs/licenses/python/coverage.txt +++ /dev/null @@ -1,179 +0,0 @@ -https://github.com/nedbat/coveragepy/LICENSE.txt - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS diff --git a/docs/licenses/python/cryptography.txt b/docs/licenses/python/cryptography.txt deleted file mode 100644 index 705b366e54..0000000000 --- a/docs/licenses/python/cryptography.txt +++ /dev/null @@ -1,5 +0,0 @@ -https://github.com/pyca/cryptography/LICENSE - -This software is made available under the terms of *either* of the licenses -found in LICENSE.APACHE or LICENSE.BSD. Contributions to cryptography are made -under the terms of *both* these licenses. diff --git a/docs/licenses/python/deprecated.txt b/docs/licenses/python/deprecated.txt deleted file mode 100644 index 56ece2ac41..0000000000 --- a/docs/licenses/python/deprecated.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/tantale/deprecated/LICENSE.rst - -The MIT License (MIT) - -Copyright (c) 2017 Laurent LAPORTE - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/docker.txt b/docs/licenses/python/docker.txt deleted file mode 100644 index 98a5bad3ef..0000000000 --- a/docs/licenses/python/docker.txt +++ /dev/null @@ -1,193 +0,0 @@ -https://github.com/docker/docker-py/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - Copyright 2016 Docker, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/docutils.txt b/docs/licenses/python/docutils.txt deleted file mode 100644 index 995d8b772c..0000000000 --- a/docs/licenses/python/docutils.txt +++ /dev/null @@ -1,160 +0,0 @@ -https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/COPYING.txt - -================== - Copying Docutils -================== - -:Author: David Goodger -:Contact: goodger@python.org -:Date: $Date$ -:Web site: https://docutils.sourceforge.io/ -:Copyright: This document has been placed in the public domain. - -Most of the files included in this project have been placed in the -public domain, and therefore have no license requirements and no -restrictions on copying or usage; see the `Public Domain Dedication`_ -below. There are a few exceptions_, listed below. -Files in the Sandbox_ are not distributed with Docutils releases and -may have different license terms. - - -Public Domain Dedication -======================== - -The persons who have associated their work with this project (the -"Dedicator": David Goodger and the many contributors to the Docutils -project) hereby dedicate the entire copyright, less the exceptions_ -listed below, in the work of authorship known as "Docutils" identified -below (the "Work") to the public domain. - -The primary repository for the Work is the Internet World Wide Web -site . The Work consists of the -files within the "docutils" module of the Docutils project Subversion -repository (http://svn.code.sf.net/p/docutils/code/), -whose Internet web interface is located at -. Files dedicated to the -public domain may be identified by the inclusion, near the beginning -of each file, of a declaration of the form:: - - Copyright: This document/module/DTD/stylesheet/file/etc. has been - placed in the public domain. - -Dedicator makes this dedication for the benefit of the public at large -and to the detriment of Dedicator's heirs and successors. Dedicator -intends this dedication to be an overt act of relinquishment in -perpetuity of all present and future rights under copyright law, -whether vested or contingent, in the Work. Dedicator understands that -such relinquishment of all rights includes the relinquishment of all -rights to enforce (by lawsuit or otherwise) those copyrights in the -Work. - -Dedicator recognizes that, once placed in the public domain, the Work -may be freely reproduced, distributed, transmitted, used, modified, -built upon, or otherwise exploited by anyone for any purpose, -commercial or non-commercial, and in any way, including by methods -that have not yet been invented or conceived. - -(This dedication is derived from the text of the `Creative Commons -Public Domain Dedication`. [#]_) - -.. [#] Creative Commons has `retired this legal tool`__ and does not - recommend that it be applied to works: This tool is based on United - States law and may not be applicable outside the US. For dedicating new - works to the public domain, Creative Commons recommend the replacement - Public Domain Dedication CC0_ (CC zero, "No Rights Reserved"). So does - the Free Software Foundation in its license-list_. - - __ http://creativecommons.org/retiredlicenses - .. _CC0: http://creativecommons.org/about/cc0 - -Exceptions -========== - -The exceptions to the `Public Domain Dedication`_ above are: - -* docutils/utils/smartquotes.py - - Copyright © 2011 Günter Milde, - based on `SmartyPants`_ © 2003 John Gruber - (released under a "revised" `BSD 3-Clause License`_ included in the file) - and smartypants.py © 2004, 2007 Chad Miller. - Released under the terms of the `BSD 2-Clause License`_ - (`local copy `__). - - .. _SmartyPants: http://daringfireball.net/projects/smartypants/ - -* docutils/utils/math/latex2mathml.py - - Copyright © Jens Jørgen Mortensen, Günter Milde. - Released under the terms of the `BSD 2-Clause License`_ - (`local copy `__). - -* docutils/utils/math/math2html.py, - docutils/writers/html5_polyglot/math.css - - Copyright © 2009,2010 Alex Fernández; 2021 Günter Milde - - These files were part of eLyXer_, released under the `GNU - General Public License`_ version 3 or later. The author relicensed - them for Docutils under the terms of the `BSD 2-Clause License`_ - (`local copy `__). - - .. _eLyXer: https://github.com/alexfernandez/elyxer - -* docutils/__main__.py, - docutils/parsers/commonmark_wrapper.py, - docutils/parsers/recommonmark_wrapper.py, - docutils/utils/error_reporting.py, - docutils/utils/math/__init__.py, - docutils/utils/math/latex2mathml.py, - docutils/utils/math/tex2mathml_extern.py, - docutils/utils/punctuation_chars.py, - docutils/utils/smartquotes.py, - docutils/writers/html5_polyglot/__init__.py, - docutils/writers/html5_polyglot/*.css, - docutils/writers/latex2e/docutils.sty, - docutils/writers/xetex/__init__.py, - test/test_parsers/test_recommonmark/\*.py, - test/test_parsers/test_rst/test_directives/test__init__.py, - test/test_parsers/test_rst/test_directives/test_code_parsing.py, - test/test_parsers/test_rst/test_line_length_limit_default.py, - test/test_parsers/test_rst/test_line_length_limit.py, - test/test_writers/test_latex2e_misc.py, - test/transforms/test_smartquotes.py, - tools/docutils-cli.py, - tools/rst2html5.py - - Copyright © Günter Milde. - Released under the terms of the `BSD 2-Clause License`_ - (`local copy `__). - -* docutils/utils/roman.py - - copyright by Mark Pilgrim, released under the - `Python 2.1.1 license`_ (`local copy`__). - - __ licenses/python-2-1-1.txt - -* tools/editors/emacs/rst.el - - copyright by Free Software Foundation, Inc., - released under the `GNU General Public License`_ version 3 or later - (`local copy`__). - - __ licenses/gpl-3-0.txt - -All used licenses are OSI-approved_ and GPL-compatible_. - -Plaintext versions of all the linked-to licenses are provided in the -licenses_ directory. - -.. _sandbox: https://docutils.sourceforge.io/sandbox/README.html -.. _licenses: licenses/ -.. _Python 2.1.1 license: https://docs.python.org/3/license.html -.. _GNU General Public License: https://www.gnu.org/copyleft/gpl.html -.. _BSD 2-Clause License: http://opensource.org/licenses/BSD-2-Clause -.. _BSD 3-Clause License: https://opensource.org/licenses/BSD-3-Clause -.. _OSI-approved: http://opensource.org/licenses/ -.. _license-list: -.. _GPL-compatible: https://www.gnu.org/licenses/license-list.html - diff --git a/docs/licenses/python/elasticsearch-dsl.txt b/docs/licenses/python/elasticsearch-dsl.txt deleted file mode 100644 index 6426147088..0000000000 --- a/docs/licenses/python/elasticsearch-dsl.txt +++ /dev/null @@ -1,178 +0,0 @@ -https://github.com/elasticsearch/elasticsearch-dsl-py/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - diff --git a/docs/licenses/python/elasticsearch.txt b/docs/licenses/python/elasticsearch.txt deleted file mode 100644 index f01c90fe70..0000000000 --- a/docs/licenses/python/elasticsearch.txt +++ /dev/null @@ -1,178 +0,0 @@ -https://github.com/elastic/elasticsearch-py/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - diff --git a/docs/licenses/python/fastavro.txt b/docs/licenses/python/fastavro.txt deleted file mode 100644 index f2e436b4aa..0000000000 --- a/docs/licenses/python/fastavro.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/fastavro/fastavro/LICENSE - -MIT License - -Copyright (c) 2011 Miki Tebeka - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/flake8.txt b/docs/licenses/python/flake8.txt deleted file mode 100644 index 62f28139fc..0000000000 --- a/docs/licenses/python/flake8.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/pycqa/flake8/LICENSE - -== Flake8 License (MIT) == - -Copyright (C) 2011-2013 Tarek Ziade -Copyright (C) 2012-2016 Ian Cordasco - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/flask-basicauth.txt b/docs/licenses/python/flask-basicauth.txt deleted file mode 100644 index 086bd95ab9..0000000000 --- a/docs/licenses/python/flask-basicauth.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/jpvanhal/flask-basicauth/LICENSE - -Copyright (c) 2013, Janne Vanhala - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* The names of the contributors may not be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/flask-cors.txt b/docs/licenses/python/flask-cors.txt deleted file mode 100644 index dae7873786..0000000000 --- a/docs/licenses/python/flask-cors.txt +++ /dev/null @@ -1,9 +0,0 @@ -https://github.com/corydolphin/flask-cors/LICENSE - -Copyright (C) 2016 Cory Dolphin, Olin College - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/flask.txt b/docs/licenses/python/flask.txt deleted file mode 100644 index 0ea6fe9a7b..0000000000 --- a/docs/licenses/python/flask.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/flask/LICENSE.txt - -Copyright 2010 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/furl.txt b/docs/licenses/python/furl.txt deleted file mode 100644 index 7fc983ed07..0000000000 --- a/docs/licenses/python/furl.txt +++ /dev/null @@ -1,32 +0,0 @@ -https://github.com/gruns/furl/LICENSE.md - -Build Amazing Things. - -*** - -### Unlicense - -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to . \ No newline at end of file diff --git a/docs/licenses/python/gevent.txt b/docs/licenses/python/gevent.txt deleted file mode 100644 index 95b041fcdc..0000000000 --- a/docs/licenses/python/gevent.txt +++ /dev/null @@ -1,27 +0,0 @@ -https://github.com/gevent/gevent/LICENSE - -MIT License - -Except when otherwise stated (look at the beginning of each file) the software -and the documentation in this project are copyrighted by: - - Denis Bilenko and the contributors, http://www.gevent.org - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/geventhttpclient.txt b/docs/licenses/python/geventhttpclient.txt deleted file mode 100644 index 3d2f17d066..0000000000 --- a/docs/licenses/python/geventhttpclient.txt +++ /dev/null @@ -1,30 +0,0 @@ -http://github.com/gwik/geventhttpclient/LICENSE.txt - -Based on llhttp, copyright Fedor Indutny, 2018. - -Python extension is copyright Antonin Amand , -licensed under the same terms. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to -deal in the Software without restriction, including without limitation the -rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. - -Note: Previous versions of gevenhttpclient used http_parser.c, which in turn -was based on src/http/ngx_http_parse.c from NGINX, copyright Igor Sysoev, -Joyent, Inc. and other Node contributors. See http://github.com/joyent/http-parser -for more information - diff --git a/docs/licenses/python/gitdb.txt b/docs/licenses/python/gitdb.txt deleted file mode 100644 index 4e7879059c..0000000000 --- a/docs/licenses/python/gitdb.txt +++ /dev/null @@ -1,44 +0,0 @@ -https://github.com/gitpython-developers/gitdb/LICENSE - -Copyright (C) 2010, 2011 Sebastian Thiel and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -* Neither the name of the GitDB project nor the names of -its contributors may be used to endorse or promote products derived -from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Additional Licenses -------------------- -The files at -gitdb/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx -and -gitdb/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack -are licensed under GNU GPL as part of the git source repository, -see http://en.wikipedia.org/wiki/Git_%28software%29 for more information. - -They are not required for the actual operation, which is why they are not found -in the distribution package. diff --git a/docs/licenses/python/gitpython.txt b/docs/licenses/python/gitpython.txt deleted file mode 100644 index 55d270b112..0000000000 --- a/docs/licenses/python/gitpython.txt +++ /dev/null @@ -1,31 +0,0 @@ -https://github.com/gitpython-developers/GitPython/LICENSE - -Copyright (C) 2008, 2009 Michael Trier and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -* Neither the name of the GitPython project nor the names of -its contributors may be used to endorse or promote products derived -from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/google-api-core.txt b/docs/licenses/python/google-api-core.txt deleted file mode 100644 index af9bd56811..0000000000 --- a/docs/licenses/python/google-api-core.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-api-core/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-api-python-client.txt b/docs/licenses/python/google-api-python-client.txt deleted file mode 100644 index 995e735ba5..0000000000 --- a/docs/licenses/python/google-api-python-client.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/googleapis/google-api-python-client/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-auth-httplib2.txt b/docs/licenses/python/google-auth-httplib2.txt deleted file mode 100644 index 8ad149cefb..0000000000 --- a/docs/licenses/python/google-auth-httplib2.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/GoogleCloudPlatform/google-auth-library-python-httplib2/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-auth.txt b/docs/licenses/python/google-auth.txt deleted file mode 100644 index 3494233057..0000000000 --- a/docs/licenses/python/google-auth.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/googleapis/google-auth-library-python/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-cloud-bigquery-reservation.txt b/docs/licenses/python/google-cloud-bigquery-reservation.txt deleted file mode 100644 index 8cab6f17fc..0000000000 --- a/docs/licenses/python/google-cloud-bigquery-reservation.txt +++ /dev/null @@ -1,205 +0,0 @@ -https://github.com/googleapis/python-bigquery-reservation/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/google-cloud-bigquery.txt b/docs/licenses/python/google-cloud-bigquery.txt deleted file mode 100644 index b7e3df3080..0000000000 --- a/docs/licenses/python/google-cloud-bigquery.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-bigquery/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-cloud-core.txt b/docs/licenses/python/google-cloud-core.txt deleted file mode 100644 index 6aa8a46aab..0000000000 --- a/docs/licenses/python/google-cloud-core.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-cloud-core/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-cloud-storage.txt b/docs/licenses/python/google-cloud-storage.txt deleted file mode 100644 index 697e2700c7..0000000000 --- a/docs/licenses/python/google-cloud-storage.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-storage/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-crc32c.txt b/docs/licenses/python/google-crc32c.txt deleted file mode 100644 index a2b07de157..0000000000 --- a/docs/licenses/python/google-crc32c.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-crc32c/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/google-resumable-media.txt b/docs/licenses/python/google-resumable-media.txt deleted file mode 100644 index a67bedc830..0000000000 --- a/docs/licenses/python/google-resumable-media.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/google-resumable-media-python/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/googleapis-common-protos.txt b/docs/licenses/python/googleapis-common-protos.txt deleted file mode 100644 index 38a6c86134..0000000000 --- a/docs/licenses/python/googleapis-common-protos.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/python-api-common-protos/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/greenlet.txt b/docs/licenses/python/greenlet.txt deleted file mode 100644 index 7346ab7a43..0000000000 --- a/docs/licenses/python/greenlet.txt +++ /dev/null @@ -1,32 +0,0 @@ -https://github.com/python-greenlet/greenlet/LICENSE - -The following files are derived from Stackless Python and are subject to the -same license as Stackless Python: - - src/greenlet/slp_platformselect.h - files in src/greenlet/platform/ directory - -See LICENSE.PSF and http://www.stackless.com/ for details. - -Unless otherwise noted, the files in greenlet have been released under the -following MIT license: - -Copyright (c) Armin Rigo, Christian Tismer and contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/grpcio-status.txt b/docs/licenses/python/grpcio-status.txt deleted file mode 100644 index 55b69d8925..0000000000 --- a/docs/licenses/python/grpcio-status.txt +++ /dev/null @@ -1,409 +0,0 @@ -https://github.com/grpc/grpc.io/blob/main/LICENSE - -Except as otherwise noted, the content of this repository is licensed under the -Creative Commons Attribution 4.0 License [1], and code and code samples are -licensed under the Apache License, Version 2.0 [2]. - -Copyright 2015-present gRPC Authors. All rights reserved. - -[1]: https://creativecommons.org/licenses/by/4.0, a copy is included below. -[2]: https://apache.org/licenses/LICENSE-2.0 - ---- - -Creative Commons Attribution 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution 4.0 International Public License ("Public License"). To the -extent this Public License may be interpreted as a contract, You are -granted the Licensed Rights in consideration of Your acceptance of -these terms and conditions, and the Licensor grants You such rights in -consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - d. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - e. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - f. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - g. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - h. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - i. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - j. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - k. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's - License You apply must not prevent recipients of the Adapted - Material from complying with this Public License. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material; and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public licenses. -Notwithstanding, Creative Commons may elect to apply one of its public -licenses to material it publishes and in those instances will be -considered the “Licensor.” The text of the Creative Commons public -licenses is dedicated to the public domain under the CC0 Public Domain -Dedication. Except for the limited purpose of indicating that material -is shared under a Creative Commons public license or as otherwise -permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the public -licenses. - -Creative Commons may be contacted at creativecommons.org. - diff --git a/docs/licenses/python/grpcio.txt b/docs/licenses/python/grpcio.txt deleted file mode 100644 index 700b18b2d9..0000000000 --- a/docs/licenses/python/grpcio.txt +++ /dev/null @@ -1,612 +0,0 @@ -https://github.com/grpc/grpc/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ------------------------------------------------------------ - -BSD 3-Clause License - -Copyright 2016, Google Inc. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from this -software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. - ------------------------------------------------------------ - -Mozilla Public License Version 2.0 -================================== - -1. Definitions --------------- - -1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. - -1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or - - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. - -1.6. "Executable Form" - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. - -1.8. "License" - means this document. - -1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. - -1.10. "Modifications" - means any of the following: - - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or - - (b) any new file in Source Code Form that contains any Covered - Software. - -1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. - -1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. - -1.13. "Source Code Form" - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. - -2. License Grants and Conditions --------------------------------- - -2.1. Grants - -Each Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - -(b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - -The licenses granted in Section 2.1 with respect to any Contribution -become effective for each Contribution on the date the Contributor first -distributes such Contribution. - -2.3. Limitations on Grant Scope - -The licenses granted in this Section 2 are the only rights granted under -this License. No additional rights or licenses will be implied from the -distribution or licensing of Covered Software under this License. -Notwithstanding Section 2.1(b) above, no patent license is granted by a -Contributor: - -(a) for any code that a Contributor has removed from Covered Software; - or - -(b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - -(c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. - -This License does not grant any rights in the trademarks, service marks, -or logos of any Contributor (except as may be necessary to comply with -the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - -No Contributor makes additional grants as a result of Your choice to -distribute the Covered Software under a subsequent version of this -License (see Section 10.2) or under the terms of a Secondary License (if -permitted under the terms of Section 3.3). - -2.5. Representation - -Each Contributor represents that the Contributor believes its -Contributions are its original creation(s) or it has sufficient rights -to grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - -This License is not intended to limit any rights You have under -applicable copyright doctrines of fair use, fair dealing, or other -equivalents. - -2.7. Conditions - -Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted -in Section 2.1. - -3. Responsibilities -------------------- - -3.1. Distribution of Source Form - -All distribution of Covered Software in Source Code Form, including any -Modifications that You create or to which You contribute, must be under -the terms of this License. You must inform recipients that the Source -Code Form of the Covered Software is governed by the terms of this -License, and how they can obtain a copy of this License. You may not -attempt to alter or restrict the recipients' rights in the Source Code -Form. - -3.2. Distribution of Executable Form - -If You distribute Covered Software in Executable Form then: - -(a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and - -(b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - -You may create and distribute a Larger Work under terms of Your choice, -provided that You also comply with the requirements of this License for -the Covered Software. If the Larger Work is a combination of Covered -Software with a work governed by one or more Secondary Licenses, and the -Covered Software is not Incompatible With Secondary Licenses, this -License permits You to additionally distribute such Covered Software -under the terms of such Secondary License(s), so that the recipient of -the Larger Work may, at their option, further distribute the Covered -Software under the terms of either this License or such Secondary -License(s). - -3.4. Notices - -You may not remove or alter the substance of any license notices -(including copyright notices, patent notices, disclaimers of warranty, -or limitations of liability) contained within the Source Code Form of -the Covered Software, except that You may alter any license notices to -the extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - -You may choose to offer, and to charge a fee for, warranty, support, -indemnity or liability obligations to one or more recipients of Covered -Software. However, You may do so only on Your own behalf, and not on -behalf of any Contributor. You must make it absolutely clear that any -such warranty, support, indemnity, or liability obligation is offered by -You alone, and You hereby agree to indemnify every Contributor for any -liability incurred by such Contributor as a result of warranty, support, -indemnity or liability terms You offer. You may include additional -disclaimers of warranty and limitations of liability specific to any -jurisdiction. - -4. Inability to Comply Due to Statute or Regulation ---------------------------------------------------- - -If it is impossible for You to comply with any of the terms of this -License with respect to some or all of the Covered Software due to -statute, judicial order, or regulation then You must: (a) comply with -the terms of this License to the maximum extent possible; and (b) -describe the limitations and the code they affect. Such description must -be placed in a text file included with all distributions of the Covered -Software under this License. Except to the extent prohibited by statute -or regulation, such description must be sufficiently detailed for a -recipient of ordinary skill to be able to understand it. - -5. Termination --------------- - -5.1. The rights granted under this License will terminate automatically -if You fail to comply with any of its terms. However, if You become -compliant, then the rights granted under this License from a particular -Contributor are reinstated (a) provisionally, unless and until such -Contributor explicitly and finally terminates Your grants, and (b) on an -ongoing basis, if such Contributor fails to notify You of the -non-compliance by some reasonable means prior to 60 days after You have -come back into compliance. Moreover, Your grants from a particular -Contributor are reinstated on an ongoing basis if such Contributor -notifies You of the non-compliance by some reasonable means, this is the -first time You have received notice of non-compliance with this License -from such Contributor, and You become compliant prior to 30 days after -Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent -infringement claim (excluding declaratory judgment actions, -counter-claims, and cross-claims) alleging that a Contributor Version -directly or indirectly infringes any patent, then the rights granted to -You by any and all Contributors for the Covered Software under Section -2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all -end user license agreements (excluding distributors and resellers) which -have been validly granted by You or Your distributors under this License -prior to termination shall survive termination. - -************************************************************************ -* * -* 6. Disclaimer of Warranty * -* ------------------------- * -* * -* Covered Software is provided under this License on an "as is" * -* basis, without warranty of any kind, either expressed, implied, or * -* statutory, including, without limitation, warranties that the * -* Covered Software is free of defects, merchantable, fit for a * -* particular purpose or non-infringing. The entire risk as to the * -* quality and performance of the Covered Software is with You. * -* Should any Covered Software prove defective in any respect, You * -* (not any Contributor) assume the cost of any necessary servicing, * -* repair, or correction. This disclaimer of warranty constitutes an * -* essential part of this License. No use of any Covered Software is * -* authorized under this License except under this disclaimer. * -* * -************************************************************************ - -************************************************************************ -* * -* 7. Limitation of Liability * -* -------------------------- * -* * -* Under no circumstances and under no legal theory, whether tort * -* (including negligence), contract, or otherwise, shall any * -* Contributor, or anyone who distributes Covered Software as * -* permitted above, be liable to You for any direct, indirect, * -* special, incidental, or consequential damages of any character * -* including, without limitation, damages for lost profits, loss of * -* goodwill, work stoppage, computer failure or malfunction, or any * -* and all other commercial damages or losses, even if such party * -* shall have been informed of the possibility of such damages. This * -* limitation of liability shall not apply to liability for death or * -* personal injury resulting from such party's negligence to the * -* extent applicable law prohibits such limitation. Some * -* jurisdictions do not allow the exclusion or limitation of * -* incidental or consequential damages, so this exclusion and * -* limitation may not apply to You. * -* * -************************************************************************ - -8. Litigation -------------- - -Any litigation relating to this License may be brought only in the -courts of a jurisdiction where the defendant maintains its principal -place of business and such litigation shall be governed by laws of that -jurisdiction, without reference to its conflict-of-law provisions. -Nothing in this Section shall prevent a party's ability to bring -cross-claims or counter-claims. - -9. Miscellaneous ----------------- - -This License represents the complete agreement concerning the subject -matter hereof. If any provision of this License is held to be -unenforceable, such provision shall be reformed only to the extent -necessary to make it enforceable. Any law or regulation which provides -that the language of a contract shall be construed against the drafter -shall not be used to construe this License against a Contributor. - -10. Versions of the License ---------------------------- - -10.1. New Versions - -Mozilla Foundation is the license steward. Except as provided in Section -10.3, no one other than the license steward has the right to modify or -publish new versions of this License. Each version will be given a -distinguishing version number. - -10.2. Effect of New Versions - -You may distribute the Covered Software under the terms of the version -of the License under which You originally received the Covered Software, -or under the terms of any subsequent version published by the license -steward. - -10.3. Modified Versions - -If you create software not governed by this License, and you want to -create a new license for such software, you may create and use a -modified version of this License if you rename the license and remove -any references to the name of the license steward (except to note that -such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary -Licenses - -If You choose to distribute Source Code Form that is Incompatible With -Secondary Licenses under the terms of this version of the License, the -notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice -------------------------------------------- - - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular -file, then You may include the notice in a location (such as a LICENSE -file in a relevant directory) where a recipient would be likely to look -for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice ---------------------------------------------------------- - - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. diff --git a/docs/licenses/python/http-message-signatures.txt b/docs/licenses/python/http-message-signatures.txt deleted file mode 100644 index 92ab5ffe4c..0000000000 --- a/docs/licenses/python/http-message-signatures.txt +++ /dev/null @@ -1,193 +0,0 @@ -https://github.com/pyauth/http-message-signatures/LICENSE - -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and -distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright -owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities -that control, are controlled by, or are under common control with that entity. -For the purposes of this definition, "control" means (i) the power, direct or -indirect, to cause the direction or management of such entity, whether by -contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising -permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including -but not limited to software source code, documentation source, and configuration -files. - -"Object" form shall mean any form resulting from mechanical transformation or -translation of a Source form, including but not limited to compiled object code, -generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made -available under the License, as indicated by a copyright notice that is included -in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that -is based on (or derived from) the Work and for which the editorial revisions, -annotations, elaborations, or other modifications represent, as a whole, an -original work of authorship. For the purposes of this License, Derivative Works -shall not include works that remain separable from, or merely link (or bind by -name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version -of the Work and any modifications or additions to that Work or Derivative Works -thereof, that is intentionally submitted to Licensor for inclusion in the Work -by the copyright owner or by an individual or Legal Entity authorized to submit -on behalf of the copyright owner. For the purposes of this definition, -"submitted" means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, and -issue tracking systems that are managed by, or on behalf of, the Licensor for -the purpose of discussing and improving the Work, but excluding communication -that is conspicuously marked or otherwise designated in writing by the copyright -owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf -of whom a Contribution has been received by Licensor and subsequently -incorporated within the Work. - -2. Grant of Copyright License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the Work and such -Derivative Works in Source or Object form. - -3. Grant of Patent License. - -Subject to the terms and conditions of this License, each Contributor hereby -grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, -irrevocable (except as stated in this section) patent license to make, have -made, use, offer to sell, sell, import, and otherwise transfer the Work, where -such license applies only to those patent claims licensable by such Contributor -that are necessarily infringed by their Contribution(s) alone or by combination -of their Contribution(s) with the Work to which such Contribution(s) was -submitted. If You institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work or a -Contribution incorporated within the Work constitutes direct or contributory -patent infringement, then any patent licenses granted to You under this License -for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. - -You may reproduce and distribute copies of the Work or Derivative Works thereof -in any medium, with or without modifications, and in Source or Object form, -provided that You meet the following conditions: - -You must give any other recipients of the Work or Derivative Works a copy of -this License; and -You must cause any modified files to carry prominent notices stating that You -changed the files; and -You must retain, in the Source form of any Derivative Works that You distribute, -all copyright, patent, trademark, and attribution notices from the Source form -of the Work, excluding those notices that do not pertain to any part of the -Derivative Works; and -If the Work includes a "NOTICE" text file as part of its distribution, then any -Derivative Works that You distribute must include a readable copy of the -attribution notices contained within such NOTICE file, excluding those notices -that do not pertain to any part of the Derivative Works, in at least one of the -following places: within a NOTICE text file distributed as part of the -Derivative Works; within the Source form or documentation, if provided along -with the Derivative Works; or, within a display generated by the Derivative -Works, if and wherever such third-party notices normally appear. The contents of -the NOTICE file are for informational purposes only and do not modify the -License. You may add Your own attribution notices within Derivative Works that -You distribute, alongside or as an addendum to the NOTICE text from the Work, -provided that such additional attribution notices cannot be construed as -modifying the License. -You may add Your own copyright statement to Your modifications and may provide -additional or different license terms and conditions for use, reproduction, or -distribution of Your modifications, or for any such Derivative Works as a whole, -provided Your use, reproduction, and distribution of the Work otherwise complies -with the conditions stated in this License. - -5. Submission of Contributions. - -Unless You explicitly state otherwise, any Contribution intentionally submitted -for inclusion in the Work by You to the Licensor shall be under the terms and -conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of -any separate license agreement you may have executed with Licensor regarding -such Contributions. - -6. Trademarks. - -This License does not grant permission to use the trade names, trademarks, -service marks, or product names of the Licensor, except as required for -reasonable and customary use in describing the origin of the Work and -reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. - -Unless required by applicable law or agreed to in writing, Licensor provides the -Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, -including, without limitation, any warranties or conditions of TITLE, -NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are -solely responsible for determining the appropriateness of using or -redistributing the Work and assume any risks associated with Your exercise of -permissions under this License. - -8. Limitation of Liability. - -In no event and under no legal theory, whether in tort (including negligence), -contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, incidental, -or consequential damages of any character arising as a result of this License or -out of the use or inability to use the Work (including but not limited to -damages for loss of goodwill, work stoppage, computer failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has -been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. - -While redistributing the Work or Derivative Works thereof, You may choose to -offer, and charge a fee for, acceptance of support, warranty, indemnity, or -other liability obligations and/or rights consistent with this License. However, -in accepting such obligations, You may act only on Your own behalf and on Your -sole responsibility, not on behalf of any other Contributor, and only if You -agree to indemnify, defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason of your -accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work - -To apply the Apache License to your work, attach the following boilerplate -notice, with the fields enclosed by brackets "[]" replaced with your own -identifying information. (Don't include the brackets!) The text should be -enclosed in the appropriate comment syntax for the file format. We also -recommend that a file or class name and description of purpose be included on -the same "printed page" as the copyright notice for easier identification within -third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/http-sfv.txt b/docs/licenses/python/http-sfv.txt deleted file mode 100644 index 60fd1995d9..0000000000 --- a/docs/licenses/python/http-sfv.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/mnot/http_sfv/LICENCE.md - -Copyright (c) 2018-2020 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - diff --git a/docs/licenses/python/http_sfv.txt b/docs/licenses/python/http_sfv.txt deleted file mode 100644 index 474f0ef626..0000000000 --- a/docs/licenses/python/http_sfv.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/mnot/http_sfv/LICENCE.md - -Copyright (c) 2018-2020 Mark Nottingham - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/httplib2.txt b/docs/licenses/python/httplib2.txt deleted file mode 100644 index c352c0b2ad..0000000000 --- a/docs/licenses/python/httplib2.txt +++ /dev/null @@ -1,25 +0,0 @@ -https://github.com/httplib2/httplib2/LICENSE - -Httplib2 Software License - -Copyright (c) 2006 by Joe Gregorio - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/idna.txt b/docs/licenses/python/idna.txt deleted file mode 100644 index 191eac8f84..0000000000 --- a/docs/licenses/python/idna.txt +++ /dev/null @@ -1,33 +0,0 @@ -https://github.com/kjd/idna/LICENSE.md - -BSD 3-Clause License - -Copyright (c) 2013-2024, Kim Davies and contributors. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/importlib-metadata.txt b/docs/licenses/python/importlib-metadata.txt deleted file mode 100644 index dc1dee6b7a..0000000000 --- a/docs/licenses/python/importlib-metadata.txt +++ /dev/null @@ -1,205 +0,0 @@ -https://github.com/python/importlib_metadata/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/importlib-resources.txt b/docs/licenses/python/importlib-resources.txt deleted file mode 100644 index 1aeb154472..0000000000 --- a/docs/licenses/python/importlib-resources.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/python/importlib_resources/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/inquirer.txt b/docs/licenses/python/inquirer.txt deleted file mode 100644 index 322190b9e6..0000000000 --- a/docs/licenses/python/inquirer.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/magmax/python-inquirer/LICENSE - -The MIT License (MIT) - -Copyright (c) 2014 Miguel Ángel García - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/itsdangerous.txt b/docs/licenses/python/itsdangerous.txt deleted file mode 100644 index fda1ed4792..0000000000 --- a/docs/licenses/python/itsdangerous.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/itsdangerous/LICENSE.txt - -Copyright 2011 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/jdcal.txt b/docs/licenses/python/jdcal.txt deleted file mode 100644 index 31bc046ec9..0000000000 --- a/docs/licenses/python/jdcal.txt +++ /dev/null @@ -1,12 +0,0 @@ -https://github.com/phn/jdcal/LICENSE.txt - -Copyright (c) 2011, Prasanth Nair -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/docs/licenses/python/jinja2.txt b/docs/licenses/python/jinja2.txt deleted file mode 100644 index 41ff7363b7..0000000000 --- a/docs/licenses/python/jinja2.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/jinja/LICENSE.txt - -Copyright 2007 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/jmespath.txt b/docs/licenses/python/jmespath.txt deleted file mode 100644 index 8cb688b400..0000000000 --- a/docs/licenses/python/jmespath.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/jmespath/jmespath.py/LICENSE - -MIT License - -Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/jq.txt b/docs/licenses/python/jq.txt deleted file mode 100644 index 6fead34282..0000000000 --- a/docs/licenses/python/jq.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/mwilliamson/jq.py/LICENSE - -Copyright (c) 2013, Michael Williamson -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/jsonschema-spec.txt b/docs/licenses/python/jsonschema-spec.txt deleted file mode 100644 index e8a4367409..0000000000 --- a/docs/licenses/python/jsonschema-spec.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/p1c2u/jsonschema-spec/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/jsonschema.txt b/docs/licenses/python/jsonschema.txt deleted file mode 100644 index a69b4161ad..0000000000 --- a/docs/licenses/python/jsonschema.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/python-jsonschema/jsonschema/COPYING - -Copyright (c) 2013 Julian Berman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/jwcrypto.txt b/docs/licenses/python/jwcrypto.txt deleted file mode 100644 index f8acf0d962..0000000000 --- a/docs/licenses/python/jwcrypto.txt +++ /dev/null @@ -1,167 +0,0 @@ -https://github.com/latchset/jwcrypto/LICENSE - - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/docs/licenses/python/lazy-object-proxy.txt b/docs/licenses/python/lazy-object-proxy.txt deleted file mode 100644 index ca906f681b..0000000000 --- a/docs/licenses/python/lazy-object-proxy.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/ionelmc/python-lazy-object-proxy/LICENSE - -BSD 2-Clause License - -Copyright (c) 2014-2023, Ionel Cristian Mărieș. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/locust.txt b/docs/licenses/python/locust.txt deleted file mode 100644 index 758d1260c0..0000000000 --- a/docs/licenses/python/locust.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/locustio/locust/LICENSE - -The MIT License - -Copyright (c) 2009-2010, Carl Byström, Jonatan Heyman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/markupsafe.txt b/docs/licenses/python/markupsafe.txt deleted file mode 100644 index 5a8da38607..0000000000 --- a/docs/licenses/python/markupsafe.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/markupsafe/LICENSE.txt - -Copyright 2010 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/mccabe.txt b/docs/licenses/python/mccabe.txt deleted file mode 100644 index ad02297d10..0000000000 --- a/docs/licenses/python/mccabe.txt +++ /dev/null @@ -1,27 +0,0 @@ -https://github.com/pycqa/mccabe/LICENSE - -Copyright © Ned Batchelder -Copyright © 2011-2013 Tarek Ziade -Copyright © 2013 Florent Xicluna - -Licensed under the terms of the Expat License - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation files -(the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/more-itertools.txt b/docs/licenses/python/more-itertools.txt deleted file mode 100644 index b2e6d80e47..0000000000 --- a/docs/licenses/python/more-itertools.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/more-itertools/more-itertools/LICENSE - -Copyright (c) 2012 Erik Rose - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/moto.txt b/docs/licenses/python/moto.txt deleted file mode 100644 index 2f15f61b9d..0000000000 --- a/docs/licenses/python/moto.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/getmoto/moto/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright 2012 Steve Pulec - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. \ No newline at end of file diff --git a/docs/licenses/python/msgpack.txt b/docs/licenses/python/msgpack.txt deleted file mode 100644 index 7f747a874c..0000000000 --- a/docs/licenses/python/msgpack.txt +++ /dev/null @@ -1,16 +0,0 @@ -https://github.com/msgpack/msgpack-python/COPYING - -Copyright (C) 2008-2011 INADA Naoki - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/mypy-boto3-dynamodb.txt b/docs/licenses/python/mypy-boto3-dynamodb.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-dynamodb.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-ecr.txt b/docs/licenses/python/mypy-boto3-ecr.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-ecr.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-iam.txt b/docs/licenses/python/mypy-boto3-iam.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-iam.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-kms.txt b/docs/licenses/python/mypy-boto3-kms.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-kms.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-lambda.txt b/docs/licenses/python/mypy-boto3-lambda.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-lambda.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-s3.txt b/docs/licenses/python/mypy-boto3-s3.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-s3.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-sqs.txt b/docs/licenses/python/mypy-boto3-sqs.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-sqs.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/mypy-boto3-stepfunctions.txt b/docs/licenses/python/mypy-boto3-stepfunctions.txt deleted file mode 100644 index cdb67f992c..0000000000 --- a/docs/licenses/python/mypy-boto3-stepfunctions.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/mypy_boto3_builder/LICENSE - -MIT License - -Copyright (c) 2023 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/openapi-schema-validator.txt b/docs/licenses/python/openapi-schema-validator.txt deleted file mode 100644 index c6bc5c2c36..0000000000 --- a/docs/licenses/python/openapi-schema-validator.txt +++ /dev/null @@ -1,31 +0,0 @@ -https://github.com/python-openapi/openapi-schema-validator/LICENSE - -BSD 3-Clause License - -Copyright (c) 2020, A -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/openapi-spec-validator.txt b/docs/licenses/python/openapi-spec-validator.txt deleted file mode 100644 index e6fc0a268e..0000000000 --- a/docs/licenses/python/openapi-spec-validator.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/python-openapi/openapi-spec-validator/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/orderedmultidict.txt b/docs/licenses/python/orderedmultidict.txt deleted file mode 100644 index 44c4be3486..0000000000 --- a/docs/licenses/python/orderedmultidict.txt +++ /dev/null @@ -1,33 +0,0 @@ -https://github.com/gruns/orderedmultidict/LICENSE.md - -Build Amazing Things. - -*** - -### Unlicense - -This is free and unencumbered software released into the public\ -domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or distribute\ -this software, either in source code form or as a compiled binary, for any\ -purpose, commercial or non-commercial, and by any means. - -In jurisdictions that recognize copyright laws, the author or authors of\ -this software dedicate any and all copyright interest in the software to the\ -public domain. We make this dedication for the benefit of the public at\ -large and to the detriment of our heirs and successors. We intend this\ -dedication to be an overt act of relinquishment in perpetuity of all\ -present and future rights to this software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF\ -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED\ -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\ -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT\ -SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR\ -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT\ -OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\ -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\ -SOFTWARE. - -For more information, please refer to diff --git a/docs/licenses/python/packaging.txt b/docs/licenses/python/packaging.txt deleted file mode 100644 index c057147a95..0000000000 --- a/docs/licenses/python/packaging.txt +++ /dev/null @@ -1,5 +0,0 @@ -https://github.com/pypa/packaging/LICENSE - -This software is made available under the terms of *either* of the licenses -found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made -under the terms of *both* these licenses. diff --git a/docs/licenses/python/pathable.txt b/docs/licenses/python/pathable.txt deleted file mode 100644 index 19c6da45ab..0000000000 --- a/docs/licenses/python/pathable.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/p1c2u/pathable/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/pip.txt b/docs/licenses/python/pip.txt deleted file mode 100644 index 6853293422..0000000000 --- a/docs/licenses/python/pip.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/pypa/pip/LICENSE.txt - -Copyright (c) 2008-present The pip developers (see AUTHORS.txt file) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/ply.txt b/docs/licenses/python/ply.txt deleted file mode 100644 index 3918f6100d..0000000000 --- a/docs/licenses/python/ply.txt +++ /dev/null @@ -1,5 +0,0 @@ -http://www.dabeaz.com/ply/ - -Copyright -PLY-3.2 and newer releases are distributed under a BSD-license. Older versions are licensed under the terms of the Lesser GPL (LGPL). - diff --git a/docs/licenses/python/posix-ipc.txt b/docs/licenses/python/posix-ipc.txt deleted file mode 100644 index c8b37f5d50..0000000000 --- a/docs/licenses/python/posix-ipc.txt +++ /dev/null @@ -1,26 +0,0 @@ -https://github.com/osvenskan/posix_ipc/LICENSE - -Copyright (c) 2022, Philip Semanchuk -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of posix_ipc nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY ITS CONTRIBUTORS ''AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL Philip Semanchuk BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/proto-plus.txt b/docs/licenses/python/proto-plus.txt deleted file mode 100644 index 8755edc64f..0000000000 --- a/docs/licenses/python/proto-plus.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/googleapis/proto-plus-python/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/protobuf.txt b/docs/licenses/python/protobuf.txt deleted file mode 100644 index 41f14095b5..0000000000 --- a/docs/licenses/python/protobuf.txt +++ /dev/null @@ -1,4 +0,0 @@ -https://pypi.org/project/protobuf/ - -License: 3-Clause BSD License - diff --git a/docs/licenses/python/psutil.txt b/docs/licenses/python/psutil.txt deleted file mode 100644 index c8b10cbacd..0000000000 --- a/docs/licenses/python/psutil.txt +++ /dev/null @@ -1,31 +0,0 @@ -https://github.com/giampaolo/psutil/LICENSE - -BSD 3-Clause License - -Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name of the psutil authors nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/py-partiql-parser.txt b/docs/licenses/python/py-partiql-parser.txt deleted file mode 100644 index 6582dba73e..0000000000 --- a/docs/licenses/python/py-partiql-parser.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/getmoto/py-partiql-parser/LICENSE - -MIT License - -Copyright (c) 2022 Bert Blommers - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/pyasn1-modules.txt b/docs/licenses/python/pyasn1-modules.txt deleted file mode 100644 index 4e8cb25fcb..0000000000 --- a/docs/licenses/python/pyasn1-modules.txt +++ /dev/null @@ -1,27 +0,0 @@ -https://github.com/etingof/pyasn1-modules/LICENSE.txt - -Copyright (c) 2005-2020, Ilya Etingof -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - diff --git a/docs/licenses/python/pyasn1.txt b/docs/licenses/python/pyasn1.txt deleted file mode 100644 index f4c14b485d..0000000000 --- a/docs/licenses/python/pyasn1.txt +++ /dev/null @@ -1,26 +0,0 @@ -https://github.com/pyasn1/pyasn1/LICENSE.rst - -Copyright (c) 2005-2020, Ilya Etingof -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/pyasn1_modules.txt b/docs/licenses/python/pyasn1_modules.txt deleted file mode 100644 index a4de67181f..0000000000 --- a/docs/licenses/python/pyasn1_modules.txt +++ /dev/null @@ -1,26 +0,0 @@ -https://github.com/pyasn1/pyasn1-modules/LICENSE.txt - -Copyright (c) 2005-2020, Ilya Etingof -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/pycodestyle.txt b/docs/licenses/python/pycodestyle.txt deleted file mode 100644 index 1d7ac30e60..0000000000 --- a/docs/licenses/python/pycodestyle.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://pycodestyle.pycqa.org/en/latest/ - -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - diff --git a/docs/licenses/python/pycparser.txt b/docs/licenses/python/pycparser.txt deleted file mode 100644 index da275799eb..0000000000 --- a/docs/licenses/python/pycparser.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/eliben/pycparser/LICENSE - -pycparser -- A C parser in Python - -Copyright (c) 2008-2022, Eli Bendersky -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -* Neither the name of the copyright holder nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/pyflakes.txt b/docs/licenses/python/pyflakes.txt deleted file mode 100644 index 376c0aac9d..0000000000 --- a/docs/licenses/python/pyflakes.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/PyCQA/pyflakes/LICENSE - -Copyright 2005-2011 Divmod, Inc. -Copyright 2013-2014 Florent Xicluna - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/pygithub.txt b/docs/licenses/python/pygithub.txt deleted file mode 100644 index 6bf3815f7a..0000000000 --- a/docs/licenses/python/pygithub.txt +++ /dev/null @@ -1,676 +0,0 @@ -https://github.com/pygithub/pygithub/COPYING - - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/docs/licenses/python/pyjwt.txt b/docs/licenses/python/pyjwt.txt deleted file mode 100644 index 12a9437d3c..0000000000 --- a/docs/licenses/python/pyjwt.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/jpadilla/pyjwt/LICENSE - -The MIT License (MIT) - -Copyright (c) 2015-2022 José Padilla - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/pynacl.txt b/docs/licenses/python/pynacl.txt deleted file mode 100644 index 0ddca8f198..0000000000 --- a/docs/licenses/python/pynacl.txt +++ /dev/null @@ -1,176 +0,0 @@ -https://github.com/pyca/pynacl/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. diff --git a/docs/licenses/python/pyopenssl.txt b/docs/licenses/python/pyopenssl.txt deleted file mode 100644 index bea9ae7aca..0000000000 --- a/docs/licenses/python/pyopenssl.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/pyca/pyopenssl/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/pyparsing.txt b/docs/licenses/python/pyparsing.txt deleted file mode 100644 index 53c3454e1a..0000000000 --- a/docs/licenses/python/pyparsing.txt +++ /dev/null @@ -1,20 +0,0 @@ -https://github.com/pyparsing/pyparsing/LICENSE - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/pyrsistent.txt b/docs/licenses/python/pyrsistent.txt deleted file mode 100644 index 7d0a59c1fa..0000000000 --- a/docs/licenses/python/pyrsistent.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/tobgu/pyrsistent/LICENSE.mit - -Copyright (c) 2023 Tobias Gustafsson - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/python-dateutil.txt b/docs/licenses/python/python-dateutil.txt deleted file mode 100644 index 65426364dd..0000000000 --- a/docs/licenses/python/python-dateutil.txt +++ /dev/null @@ -1,56 +0,0 @@ -https://github.com/dateutil/dateutil/LICENSE - -Copyright 2017- Paul Ganssle -Copyright 2017- dateutil contributors (see AUTHORS file) - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -The above license applies to all contributions after 2017-12-01, as well as -all contributions that have been re-licensed (see AUTHORS file for the list of -contributors who have re-licensed their code). --------------------------------------------------------------------------------- -dateutil - Extensions to the standard Python datetime module. - -Copyright (c) 2003-2011 - Gustavo Niemeyer -Copyright (c) 2012-2014 - Tomi Pieviläinen -Copyright (c) 2014-2016 - Yaron de Leeuw -Copyright (c) 2015- - Paul Ganssle -Copyright (c) 2015- - dateutil contributors (see AUTHORS file) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -The above BSD License Applies to all code, even that also covered by Apache 2.0. \ No newline at end of file diff --git a/docs/licenses/python/python-dxf.txt b/docs/licenses/python/python-dxf.txt deleted file mode 100644 index a411e223bc..0000000000 --- a/docs/licenses/python/python-dxf.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/davedoesdev/dxf/LICENCE - -Copyright (c) 2015 David Halls - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is furnished -to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/python-editor.txt b/docs/licenses/python/python-editor.txt deleted file mode 100644 index 0ac0c0f236..0000000000 --- a/docs/licenses/python/python-editor.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/fmoo/python-editor/LICENSE - -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/python-gitlab.txt b/docs/licenses/python/python-gitlab.txt deleted file mode 100644 index 85d4e7408a..0000000000 --- a/docs/licenses/python/python-gitlab.txt +++ /dev/null @@ -1,167 +0,0 @@ -https://github.com/python-gitlab/python-gitlab/COPYING - - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/docs/licenses/python/pytz.txt b/docs/licenses/python/pytz.txt deleted file mode 100644 index 93e86cf308..0000000000 --- a/docs/licenses/python/pytz.txt +++ /dev/null @@ -1,10 +0,0 @@ -https://pythonhosted.org/pytz/#license - -License - -MIT license. - -This code is also available as part of Zope 3 under the Zope Public License, Version 2.1 (ZPL). - -I’m happy to relicense this code if necessary for inclusion in other open source projects. - diff --git a/docs/licenses/python/pyyaml.txt b/docs/licenses/python/pyyaml.txt deleted file mode 100644 index 76913ffb23..0000000000 --- a/docs/licenses/python/pyyaml.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/yaml/pyyaml/LICENSE - -Copyright (c) 2017-2021 Ingy döt Net -Copyright (c) 2006-2016 Kirill Simonov - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/pyzmq.txt b/docs/licenses/python/pyzmq.txt deleted file mode 100644 index 53dfb7231e..0000000000 --- a/docs/licenses/python/pyzmq.txt +++ /dev/null @@ -1,32 +0,0 @@ -https://github.com/zeromq/pyzmq/LICENSE.md - -BSD 3-Clause License - -Copyright (c) 2009-2012, Brian Granger, Min Ragan-Kelley - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/readchar.txt b/docs/licenses/python/readchar.txt deleted file mode 100644 index adaf8f5450..0000000000 --- a/docs/licenses/python/readchar.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/magmax/python-readchar/LICENCE - -MIT Licence - -Copyright (c) 2022 Miguel Angel Garcia - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicence, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/requests-toolbelt.txt b/docs/licenses/python/requests-toolbelt.txt deleted file mode 100644 index ad4d3cccb4..0000000000 --- a/docs/licenses/python/requests-toolbelt.txt +++ /dev/null @@ -1,15 +0,0 @@ -https://github.com/requests/toolbelt/LICENSE - -Copyright 2014 Ian Cordasco, Cory Benfield - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/requests.txt b/docs/licenses/python/requests.txt deleted file mode 100644 index f6f8b7e41a..0000000000 --- a/docs/licenses/python/requests.txt +++ /dev/null @@ -1,177 +0,0 @@ -https://github.com/psf/requests/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. diff --git a/docs/licenses/python/requirements-parser.txt b/docs/licenses/python/requirements-parser.txt deleted file mode 100644 index a552092b5e..0000000000 --- a/docs/licenses/python/requirements-parser.txt +++ /dev/null @@ -1,2478 +0,0 @@ -https://github.com/madpah/requirements-parser/#readme/LICENSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - GitHub - madpah/requirements-parser: A Pip requirements file parser. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- Skip to content - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - -
- - - - - - - - - -
-
-
- - - - - - - - - - - - - - - - - - - - - - -
- - - - - - -

madpah/requirements-parser

-
-
- - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - -

Repository files navigation

Requirements Parser

-

shield_pypi-version -shield_rtfd -shield_gh-workflow-test -shield_license

-
-

This is a small Python module for parsing Pip requirement files.

-

The goal is to parse everything in the Pip requirement file format spec.

-

Installation

-
pip install requirements-parser
-
-

or

-
poetry add requirements-parser
-
-

Examples

-

requirements-parser can parse a file-like object or a text string.

-
>>> import requirements
->>> with open('requirements.txt', 'r') as fd:
-...     for req in requirements.parse(fd):
-...         print(req.name, req.specs)
-Django [('>=', '1.11'), ('<', '1.12')]
-six [('==', '1.10.0')]
-
-

It can handle most (if not all) of the options in requirement files that do not involve traversing the local filesystem. These include:

-
    -
  • editables (-e git+https://github.com/toastdriven/pyelasticsearch.git]{.title-ref})
  • -
  • version control URIs
  • -
  • egg hashes and subdirectories ([\#egg=django-haystack&subdirectory=setup]{.title-ref})
  • -
  • extras ([DocParser[PDF]]{.title-ref})
  • -
  • URLs
  • -
-

Documentation

-

View the documentation here.

-

Python Support

-

We endeavour to support all functionality for all current actively supported Python versions. -However, some features may not be possible/present in older Python versions due to their lack of support.

-

Changelog

-

See our CHANGELOG.

-

Contributing

-

Feel free to open issues, bugreports or pull requests.
-See the CONTRIBUTING file for details.

-

Copyright & License

-

requirements-parser was originally written by @davidfischer and is now maintained by @madpah. See Authors for full details.

-

Permission to modify and redistribute is granted under the terms of the Apache 2.0 license.

-

See the LICENSE file for the full license.

-
-
- - -
-
- -
-
-
-
-

About

- -

- A Pip requirements file parser. -

- - -

Topics

-
- - -
- -

Resources

- - - -

License

- - - - - - - - - - - -

Stars

- - -

Watchers

- - -

Forks

- - - -
- -
-
- - - - - - - - - - -
-
-

- - Contributors - 16 -

- - - -
    -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
  • -
    -
  • -
-
- - -
-
- - - -
-
-

Languages

-
- - -
- - -
-
- -
-
- -
- -
- - -
- -
- - -
-
- -
- -
-

Footer

- - - - -
-
- - - - - © 2024 GitHub, Inc. - -
- - -
-
- - - - - - - - - - - - - - - - - - - -
- -
-
- - - diff --git a/docs/licenses/python/responses.txt b/docs/licenses/python/responses.txt deleted file mode 100644 index d22758b3a2..0000000000 --- a/docs/licenses/python/responses.txt +++ /dev/null @@ -1,203 +0,0 @@ -https://github.com/getsentry/responses/LICENSE - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright 2015 David Cramer - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/docs/licenses/python/roundrobin.txt b/docs/licenses/python/roundrobin.txt deleted file mode 100644 index 0be18c8728..0000000000 --- a/docs/licenses/python/roundrobin.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/linnik/roundrobin/LICENSE - -MIT License - -Copyright (c) 2020 Vyacheslav Linnik - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/docs/licenses/python/rsa.txt b/docs/licenses/python/rsa.txt deleted file mode 100644 index bc96bf6d6c..0000000000 --- a/docs/licenses/python/rsa.txt +++ /dev/null @@ -1,15 +0,0 @@ -https://github.com/sybrenstuvel/python-rsa/LICENSE - -Copyright 2011 Sybren A. Stüvel - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/docs/licenses/python/s3transfer.txt b/docs/licenses/python/s3transfer.txt deleted file mode 100644 index 3699777117..0000000000 --- a/docs/licenses/python/s3transfer.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/boto/s3transfer/LICENSE.txt - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/setuptools-scm.txt b/docs/licenses/python/setuptools-scm.txt deleted file mode 100644 index 60a942824f..0000000000 --- a/docs/licenses/python/setuptools-scm.txt +++ /dev/null @@ -1,19 +0,0 @@ -https://github.com/pypa/setuptools_scm/LICENSE - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/docs/licenses/python/setuptools.txt b/docs/licenses/python/setuptools.txt deleted file mode 100644 index a495bf4b24..0000000000 --- a/docs/licenses/python/setuptools.txt +++ /dev/null @@ -1,19 +0,0 @@ -https://github.com/pypa/setuptools/LICENSE - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to -deal in the Software without restriction, including without limitation the -rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. diff --git a/docs/licenses/python/six.txt b/docs/licenses/python/six.txt deleted file mode 100644 index 912ef5422a..0000000000 --- a/docs/licenses/python/six.txt +++ /dev/null @@ -1,20 +0,0 @@ -https://github.com/benjaminp/six/LICENSE - -Copyright (c) 2010-2020 Benjamin Peterson - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/smmap.txt b/docs/licenses/python/smmap.txt deleted file mode 100644 index 47f7e4814a..0000000000 --- a/docs/licenses/python/smmap.txt +++ /dev/null @@ -1,32 +0,0 @@ -https://github.com/gitpython-developers/smmap/LICENSE - -Copyright (C) 2010, 2011 Sebastian Thiel and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -* Neither the name of the async project nor the names of -its contributors may be used to endorse or promote products derived -from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/docs/licenses/python/strict-rfc3339.txt b/docs/licenses/python/strict-rfc3339.txt deleted file mode 100644 index bd1ad32dd5..0000000000 --- a/docs/licenses/python/strict-rfc3339.txt +++ /dev/null @@ -1,4 +0,0 @@ -https://pypi.org/project/strict-rfc3339/ - -License: GNU General Public License v3 (GPLv3) (GNU General Public License Version 3) - diff --git a/docs/licenses/python/tinyquery.txt b/docs/licenses/python/tinyquery.txt deleted file mode 100644 index a29e2080bc..0000000000 --- a/docs/licenses/python/tinyquery.txt +++ /dev/null @@ -1,24 +0,0 @@ -https://github.com/Khan/tinyquery/LICENSE - -The MIT License (MIT) - -Copyright (c) 2014 Khan Academy - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/docs/licenses/python/toml.txt b/docs/licenses/python/toml.txt deleted file mode 100644 index 1f73df9e6c..0000000000 --- a/docs/licenses/python/toml.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/uiri/toml/LICENSE - -The MIT License - -Copyright 2013-2019 William Pearson -Copyright 2015-2016 Julien Enselme -Copyright 2016 Google Inc. -Copyright 2017 Samuel Vasko -Copyright 2017 Nate Prewitt -Copyright 2017 Jack Evans -Copyright 2019 Filippo Broggini - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/docs/licenses/python/tqdm.txt b/docs/licenses/python/tqdm.txt deleted file mode 100644 index dc07d7dd3e..0000000000 --- a/docs/licenses/python/tqdm.txt +++ /dev/null @@ -1,2043 +0,0 @@ -https://github.com/tqdm/tqdm/wiki/LICENSE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Home · tqdm/tqdm Wiki · GitHub - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- Skip to content - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - -
- - - - - - - - - -
-
-
- - - - - - - - - - - - - - - - - - - - -
- - - - - - -
-
-

Home

- - -
- -
- Casper da Costa-Luis edited this page May 3, 2021 - · - - 25 revisions - -
- -
-
-
-
-

Welcome to the tqdm wiki!

-

-

The wiki is still in its infancy. You may nevertheless find interesting -additional information by browsing the pages on the right.

-

Feel free to clone and contribute to this wiki, and open issues or -pull requests on the main repository!

-

Here, have some impressive background:

-

stats

- -

blogs

- -

publications

-
    -
  • da Costa-Luis, Casper (2019). "tqdm: A Fast, Extensible Progress Meter for -Python and CLI," Journal of Open Source Software. 4(37), 1277. -doi:10.21105/joss.01277 -
  • -
  • Miller, Preston; Bryce, Chapin (2017). Python Digital Forensics Cookbook: -Effective Python recipes for digital investigations. Packt Publishing Ltd. -ISBN 9781783987474.
  • -
  • Van Boxel, Dan (2017). Hands-On Deep Learning with TensorFlow. Packt -Publishing. ISBN 9781787125827.
  • -
  • Nandy, Abhishek; Biswas, Manisha (2018). "Reinforcement Learning with Keras, -TensorFlow, and ChainerRL". Reinforcement Learning : With Open AI, TensorFlow -and Keras Using Python. Apress. pp. 129–153. -doi:10.1007/978-1-4842-3285-9_5. -ISBN 9781484232859.
  • -
  • Stein, Helge S.; Guevarra, Dan; Newhouse, Paul F.; Soedarmadji, Edwin; -Gregoire, John M. (2019). "Machine learning of optical properties of -materials – predicting spectra from images and images from spectra". Chemical -Science. 10 (1): 47–55. -doi:10.1039/C8SC03077D.
  • -
  • Cook, Neil J.; Scholz, Aleks; Jayawardhana, Ray (28 November 2017). "Very -Low-mass Stars and Brown Dwarfs in Upper Scorpius Using Gaia DR1: Mass -Function, Disks, and Kinematics". The Astronomical Journal. 154 (6): 256. -arXiv:1710.11625. Bibcode:2017AJ....154..256C. -doi:10.3847/1538-3881/aa9751.
  • -
  • Madhikar, Pranav; Åström, Jan; Westerholm, Jan; Karttunen, Mikko (November -2018). "CellSim3D: GPU accelerated software for simulations of cellular growth -and division in three dimensions". Computer Physics Communications. 232: -206–213. Bibcode:2018CoPhC.232..206M. -doi:10.1016/j.cpc.2018.05.024.
  • -
  • Palmer, Geraint I.; Knight, Vincent A.; Harper, Paul R.; Hawa, Asyl L. (20 May -2018). "Ciw: An open-source discrete event simulation library". Journal of -Simulation: 1–15. -doi:10.1080/17477778.2018.1473909.
  • -
  • Knight, Vincent et al. (31 August 2016). "An open reproducible framework for -the study of the iterated prisoner's dilemma". Journal of Open Research -Software. 4. doi:10.5334/jors.125. ISSN -2049-9647.
  • -
-

... and many, many more. See e.g. https://www.oreilly.com/search/?query=tqdm.

-

misc

- - -
- - -
-
-
- - - -
Clone this wiki locally
-
- - - - - - - - -
-
-
- -
-
- - -
- -
- - - -
-
- -
- -
-

Footer

- - - - -
-
- - - - - © 2024 GitHub, Inc. - -
- - -
-
- - - - - - - - - - - - - - - - - - - -
- -
-
- - - diff --git a/docs/licenses/python/types-awscrt.txt b/docs/licenses/python/types-awscrt.txt deleted file mode 100644 index 9cc34b2078..0000000000 --- a/docs/licenses/python/types-awscrt.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/types-awscrt/LICENSE - -MIT License - -Copyright (c) 2022 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/types-s3transfer.txt b/docs/licenses/python/types-s3transfer.txt deleted file mode 100644 index 9ea4f60f86..0000000000 --- a/docs/licenses/python/types-s3transfer.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/youtype/types-s3transfer/LICENSE - -MIT License - -Copyright (c) 2022 Vlad Emelianov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/types-toml.txt b/docs/licenses/python/types-toml.txt deleted file mode 100644 index 2d4638afd9..0000000000 --- a/docs/licenses/python/types-toml.txt +++ /dev/null @@ -1,239 +0,0 @@ -https://github.com/python/typeshed/LICENSE - -The "typeshed" project is licensed under the terms of the Apache license, as -reproduced below. - -= = = = = - -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -= = = = = - -Parts of typeshed are licensed under different licenses (like the MIT -license), reproduced below. - -= = = = = - -The MIT License - -Copyright (c) 2015 Jukka Lehtosalo and contributors - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - -= = = = = diff --git a/docs/licenses/python/typing_extensions.txt b/docs/licenses/python/typing_extensions.txt deleted file mode 100644 index 2e58e5448f..0000000000 --- a/docs/licenses/python/typing_extensions.txt +++ /dev/null @@ -1,281 +0,0 @@ -https://github.com/python/typing_extensions/LICENSE - -A. HISTORY OF THE SOFTWARE -========================== - -Python was created in the early 1990s by Guido van Rossum at Stichting -Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands -as a successor of a language called ABC. Guido remains Python's -principal author, although it includes many contributions from others. - -In 1995, Guido continued his work on Python at the Corporation for -National Research Initiatives (CNRI, see https://www.cnri.reston.va.us) -in Reston, Virginia where he released several versions of the -software. - -In May 2000, Guido and the Python core development team moved to -BeOpen.com to form the BeOpen PythonLabs team. In October of the same -year, the PythonLabs team moved to Digital Creations, which became -Zope Corporation. In 2001, the Python Software Foundation (PSF, see -https://www.python.org/psf/) was formed, a non-profit organization -created specifically to own Python-related Intellectual Property. -Zope Corporation was a sponsoring member of the PSF. - -All Python releases are Open Source (see https://opensource.org for -the Open Source Definition). Historically, most, but not all, Python -releases have also been GPL-compatible; the table below summarizes -the various releases. - - Release Derived Year Owner GPL- - from compatible? (1) - - 0.9.0 thru 1.2 1991-1995 CWI yes - 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes - 1.6 1.5.2 2000 CNRI no - 2.0 1.6 2000 BeOpen.com no - 1.6.1 1.6 2001 CNRI yes (2) - 2.1 2.0+1.6.1 2001 PSF no - 2.0.1 2.0+1.6.1 2001 PSF yes - 2.1.1 2.1+2.0.1 2001 PSF yes - 2.1.2 2.1.1 2002 PSF yes - 2.1.3 2.1.2 2002 PSF yes - 2.2 and above 2.1.1 2001-now PSF yes - -Footnotes: - -(1) GPL-compatible doesn't mean that we're distributing Python under - the GPL. All Python licenses, unlike the GPL, let you distribute - a modified version without making your changes open source. The - GPL-compatible licenses make it possible to combine Python with - other software that is released under the GPL; the others don't. - -(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, - because its license has a choice of law clause. According to - CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 - is "not incompatible" with the GPL. - -Thanks to the many outside volunteers who have worked under Guido's -direction to make these releases possible. - - -B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON -=============================================================== - -Python software and documentation are licensed under the -Python Software Foundation License Version 2. - -Starting with Python 3.8.6, examples, recipes, and other code in -the documentation are dual licensed under the PSF License Version 2 -and the Zero-Clause BSD license. - -Some software incorporated into Python is under different licenses. -The licenses are listed with code falling under that license. - - -PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 --------------------------------------------- - -1. This LICENSE AGREEMENT is between the Python Software Foundation -("PSF"), and the Individual or Organization ("Licensee") accessing and -otherwise using this software ("Python") in source or binary form and -its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, PSF hereby -grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, -analyze, test, perform and/or display publicly, prepare derivative works, -distribute, and otherwise use Python alone or in any derivative version, -provided, however, that PSF's License Agreement and PSF's notice of copyright, -i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; -All Rights Reserved" are retained in Python alone or in any derivative version -prepared by Licensee. - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python. - -4. PSF is making Python available to Licensee on an "AS IS" -basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between PSF and -Licensee. This License Agreement does not grant permission to use PSF -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using Python, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. - - -BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 -------------------------------------------- - -BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 - -1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an -office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the -Individual or Organization ("Licensee") accessing and otherwise using -this software in source or binary form and its associated -documentation ("the Software"). - -2. Subject to the terms and conditions of this BeOpen Python License -Agreement, BeOpen hereby grants Licensee a non-exclusive, -royalty-free, world-wide license to reproduce, analyze, test, perform -and/or display publicly, prepare derivative works, distribute, and -otherwise use the Software alone or in any derivative version, -provided, however, that the BeOpen Python License is retained in the -Software, alone or in any derivative version prepared by Licensee. - -3. BeOpen is making the Software available to Licensee on an "AS IS" -basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE -SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS -AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY -DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -5. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -6. This License Agreement shall be governed by and interpreted in all -respects by the law of the State of California, excluding conflict of -law provisions. Nothing in this License Agreement shall be deemed to -create any relationship of agency, partnership, or joint venture -between BeOpen and Licensee. This License Agreement does not grant -permission to use BeOpen trademarks or trade names in a trademark -sense to endorse or promote products or services of Licensee, or any -third party. As an exception, the "BeOpen Python" logos available at -http://www.pythonlabs.com/logos.html may be used according to the -permissions granted on that web page. - -7. By copying, installing or otherwise using the software, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. - - -CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 ---------------------------------------- - -1. This LICENSE AGREEMENT is between the Corporation for National -Research Initiatives, having an office at 1895 Preston White Drive, -Reston, VA 20191 ("CNRI"), and the Individual or Organization -("Licensee") accessing and otherwise using Python 1.6.1 software in -source or binary form and its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, CNRI -hereby grants Licensee a nonexclusive, royalty-free, world-wide -license to reproduce, analyze, test, perform and/or display publicly, -prepare derivative works, distribute, and otherwise use Python 1.6.1 -alone or in any derivative version, provided, however, that CNRI's -License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) -1995-2001 Corporation for National Research Initiatives; All Rights -Reserved" are retained in Python 1.6.1 alone or in any derivative -version prepared by Licensee. Alternately, in lieu of CNRI's License -Agreement, Licensee may substitute the following text (omitting the -quotes): "Python 1.6.1 is made available subject to the terms and -conditions in CNRI's License Agreement. This Agreement together with -Python 1.6.1 may be located on the internet using the following -unique, persistent identifier (known as a handle): 1895.22/1013. This -Agreement may also be obtained from a proxy server on the internet -using the following URL: http://hdl.handle.net/1895.22/1013". - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python 1.6.1 or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python 1.6.1. - -4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" -basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. This License Agreement shall be governed by the federal -intellectual property law of the United States, including without -limitation the federal copyright law, and, to the extent such -U.S. federal law does not apply, by the law of the Commonwealth of -Virginia, excluding Virginia's conflict of law provisions. -Notwithstanding the foregoing, with regard to derivative works based -on Python 1.6.1 that incorporate non-separable material that was -previously distributed under the GNU General Public License (GPL), the -law of the Commonwealth of Virginia shall govern this License -Agreement only as to issues arising under or with respect to -Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this -License Agreement shall be deemed to create any relationship of -agency, partnership, or joint venture between CNRI and Licensee. This -License Agreement does not grant permission to use CNRI trademarks or -trade name in a trademark sense to endorse or promote products or -services of Licensee, or any third party. - -8. By clicking on the "ACCEPT" button where indicated, or by copying, -installing or otherwise using Python 1.6.1, Licensee agrees to be -bound by the terms and conditions of this License Agreement. - - ACCEPT - - -CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 --------------------------------------------------- - -Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, -The Netherlands. All rights reserved. - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Stichting Mathematisch -Centrum or CWI not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO -THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE -FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION ----------------------------------------------------------------------- - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR -OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. diff --git a/docs/licenses/python/tzlocal.txt b/docs/licenses/python/tzlocal.txt deleted file mode 100644 index b5163f9910..0000000000 --- a/docs/licenses/python/tzlocal.txt +++ /dev/null @@ -1,21 +0,0 @@ -https://github.com/regebro/tzlocal/LICENSE.txt - -Copyright 2011-2017 Lennart Regebro - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/uritemplate.txt b/docs/licenses/python/uritemplate.txt deleted file mode 100644 index 086dbaafc9..0000000000 --- a/docs/licenses/python/uritemplate.txt +++ /dev/null @@ -1,6 +0,0 @@ -https://github.com/python-hyper/uritemplate/blob/main/LICENSE - -This software is made available under the terms of *either* of the licenses -found in LICENSE.APACHE or LICENSE.BSD. Contributions to uritemplate are -made under the terms of *both* these licenses. - diff --git a/docs/licenses/python/urllib3.txt b/docs/licenses/python/urllib3.txt deleted file mode 100644 index cd2d803a61..0000000000 --- a/docs/licenses/python/urllib3.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/urllib3/urllib3/LICENSE.txt - -MIT License - -Copyright (c) 2008-2020 Andrey Petrov and contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/licenses/python/watchdog.txt b/docs/licenses/python/watchdog.txt deleted file mode 100644 index 298afaa151..0000000000 --- a/docs/licenses/python/watchdog.txt +++ /dev/null @@ -1,204 +0,0 @@ -https://github.com/gorakhargosh/watchdog/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/docs/licenses/python/wcwidth.txt b/docs/licenses/python/wcwidth.txt deleted file mode 100644 index 739e5e0896..0000000000 --- a/docs/licenses/python/wcwidth.txt +++ /dev/null @@ -1,29 +0,0 @@ -https://github.com/jquast/wcwidth/LICENSE - -The MIT License (MIT) - -Copyright (c) 2014 Jeff Quast - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -Markus Kuhn -- 2007-05-26 (Unicode 5.0) - -Permission to use, copy, modify, and distribute this software -for any purpose and without fee is hereby granted. The author -disclaims all warranties with regard to this software. diff --git a/docs/licenses/python/websocket-client.txt b/docs/licenses/python/websocket-client.txt deleted file mode 100644 index 426e703149..0000000000 --- a/docs/licenses/python/websocket-client.txt +++ /dev/null @@ -1,205 +0,0 @@ -https://github.com/websocket-client/websocket-client/LICENSE - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2024 engn33r - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/docs/licenses/python/werkzeug.txt b/docs/licenses/python/werkzeug.txt deleted file mode 100644 index 72f54c1c5a..0000000000 --- a/docs/licenses/python/werkzeug.txt +++ /dev/null @@ -1,30 +0,0 @@ -https://github.com/pallets/werkzeug/LICENSE.txt - -Copyright 2007 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/wheel.txt b/docs/licenses/python/wheel.txt deleted file mode 100644 index 0fd4094a19..0000000000 --- a/docs/licenses/python/wheel.txt +++ /dev/null @@ -1,23 +0,0 @@ -https://github.com/pypa/wheel/LICENSE.txt - -MIT License - -Copyright (c) 2012 Daniel Holth and contributors - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/wrapt.txt b/docs/licenses/python/wrapt.txt deleted file mode 100644 index 8fcf79357c..0000000000 --- a/docs/licenses/python/wrapt.txt +++ /dev/null @@ -1,26 +0,0 @@ -https://github.com/GrahamDumpleton/wrapt/LICENSE - -Copyright (c) 2013-2023, Graham Dumpleton -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/www-authenticate.txt b/docs/licenses/python/www-authenticate.txt deleted file mode 100644 index afe50c19bc..0000000000 --- a/docs/licenses/python/www-authenticate.txt +++ /dev/null @@ -1,16 +0,0 @@ -https://github.com/alexsdutton/www-authenticate/LICENSE - -Copyright (c) 2015 Alexander Dutton. -All rights reserved. - -Redistribution and use in source and binary forms are permitted -provided that the above copyright notice and this paragraph are -duplicated in all such forms and that any documentation, -advertising materials, and other materials related to such -distribution and use acknowledge that the software was developed -by the Alexander Dutton. The name of the Alexander Dutton may not -be used to endorse or promote products derived from this software -without specific prior written permission. -THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. diff --git a/docs/licenses/python/xmltodict.txt b/docs/licenses/python/xmltodict.txt deleted file mode 100644 index 81194ea465..0000000000 --- a/docs/licenses/python/xmltodict.txt +++ /dev/null @@ -1,9 +0,0 @@ -https://github.com/martinblech/xmltodict/LICENSE - -Copyright (C) 2012 Martin Blech and individual contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/docs/licenses/python/zipp.txt b/docs/licenses/python/zipp.txt deleted file mode 100644 index 626dc62a17..0000000000 --- a/docs/licenses/python/zipp.txt +++ /dev/null @@ -1,22 +0,0 @@ -https://github.com/jaraco/zipp/LICENSE - -Copyright Jason R. Coombs - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to -deal in the Software without restriction, including without limitation the -rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. - diff --git a/docs/licenses/python/zope.event.txt b/docs/licenses/python/zope.event.txt deleted file mode 100644 index 67f3a92117..0000000000 --- a/docs/licenses/python/zope.event.txt +++ /dev/null @@ -1,46 +0,0 @@ -https://github.com/zopefoundation/zope.event/LICENSE.txt - -Zope Public License (ZPL) Version 2.1 - -A copyright notice accompanies this license document that identifies the -copyright holders. - -This license has been certified as open source. It has also been designated as -GPL compatible by the Free Software Foundation (FSF). - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions in source code must retain the accompanying copyright -notice, this list of conditions, and the following disclaimer. - -2. Redistributions in binary form must reproduce the accompanying copyright -notice, this list of conditions, and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Names of the copyright holders must not be used to endorse or promote -products derived from this software without prior written permission from the -copyright holders. - -4. The right to distribute this software or to use it for any purpose does not -give you the right to use Servicemarks (sm) or Trademarks (tm) of the -copyright -holders. Use of them is covered by separate agreement with the copyright -holders. - -5. If any files are modified, you must cause the modified files to carry -prominent notices stating that you changed the files and the date of any -change. - -Disclaimer - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY EXPRESSED -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/python/zope.interface.txt b/docs/licenses/python/zope.interface.txt deleted file mode 100644 index a248c2cc64..0000000000 --- a/docs/licenses/python/zope.interface.txt +++ /dev/null @@ -1,46 +0,0 @@ -https://github.com/zopefoundation/zope.interface/LICENSE.txt - -Zope Public License (ZPL) Version 2.1 - -A copyright notice accompanies this license document that identifies the -copyright holders. - -This license has been certified as open source. It has also been designated as -GPL compatible by the Free Software Foundation (FSF). - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions in source code must retain the accompanying copyright -notice, this list of conditions, and the following disclaimer. - -2. Redistributions in binary form must reproduce the accompanying copyright -notice, this list of conditions, and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Names of the copyright holders must not be used to endorse or promote -products derived from this software without prior written permission from the -copyright holders. - -4. The right to distribute this software or to use it for any purpose does not -give you the right to use Servicemarks (sm) or Trademarks (tm) of the -copyright -holders. Use of them is covered by separate agreement with the copyright -holders. - -5. If any files are modified, you must cause the modified files to carry -prominent notices stating that you changed the files and the date of any -change. - -Disclaimer - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY EXPRESSED -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/licenses/smartgit.txt b/docs/licenses/smartgit.txt deleted file mode 100644 index ee019c2a65..0000000000 --- a/docs/licenses/smartgit.txt +++ /dev/null @@ -1,147 +0,0 @@ -https://www.syntevo.com/documents/smartgit-license.html - -License Agreement for SmartGit - -Last date of change: 2022-07-26 - -1 Subject of the Contract: The license terms of syntevo GmbH (hereinafter called "licensor") are applied for the concession of the rights of use for the entire or partly use of the object code of the software SmartGit (hereinafter called "SOFTWARE") to contractors, juristic persons under public law or official fund assets in terms of §310 in conjunction with §14 BGB [Civil Code] (hereinafter called "licensee"). Herewith the inclusion of the licensee's own terms and conditions is contradicted, unless their validity has explicitly been agreed to. - -2 Scope of the Rights of Use - -2.1 The following terms are valid for the assignment and use of the SOFTWARE including any documentation and the license file (a file that is custom-made for each individual granting of a license, the file being necessary for the operation of the SOFTWARE). - -2.2 They are not valid for additional services such as installation, integration, parameterization and customization of the SOFTWARE to the licensee's requirements. - -2.3 If the licensor grants a "lifetime" license, the licensee may use any new SOFTWARE version which has been released by the licensor until the SOFTWARE has reached its end of life. It's at the sole discretion of the licensor to declare the SOFTWARE's end of life. - -3 Rights of Use - -3.1 The SOFTWARE is partly copyrighted in favour of the licensor and partly in favour of other holders of rights. - -3.2 The SOFTWARE is handed over to the licensee for its intended use. The scope of the intended use as well as the kind and scope of the rights of use result from the assignment contract and said license terms. They especially depend on whether the licensee gets a - - - SOFTWARE Non-Commercial License, - - SOFTWARE Commercial License. - -3.2.1 The SOFTWARE Non-Commercial License can only be agreed-upon with a natural person. - -3.2.1.1 If a SOFTWARE Non-Commercial License is agreed-upon with the licensee, the licensor grants to licensee the non-exclusive, non-transferable right, which is limited according to the terms of clause 7 and terminated according to the terms of clause 3.2.1.2, to have the SOFTWARE used - - - on any number of machines - - if the licensee - - spends the majority of their work on open-source projects, or - - is an employee or student on a public academic institution, or - - is an employee of a public charitable organizations primarily targeting philanthropy, health research, education or social well-being. - -3.2.1.2 The Non-Commercial License is only valid for the latest version of the SOFTWARE as it is published on licensor's website and for all versions which have been first published not earlier than 6 months prior to the latest published version ('License Transition Period'). The SOFTWARE may contain a feature which enforces the termination of the Non-Commercial License for older versions by making them stop working after the License Transition Period. - -3.2.2 If a SOFTWARE Commercial License is agreed-upon with the licensee, the licensor grants the licensee the worldwide, non-exclusive, non-transferable right to have the SOFTWARE used by the agreed-upon number of users nameable on demand, that means to have the SOFTWARE saved, loaded, displayed and run permanently or temporarily. Each user (person) must be nameable on demand. Each user can be replaced by another user known by name only once in a month. From that time on, said first user must not use the SOFTWARE anymore and said second user may start to use the SOFTWARE. The licensee is responsible to perform suitable measures to ensure that the number of users known by name at no time exceeds the number of the agreed-upon licenses. Each user may use his assigned license on any number of machines, regardless whether the SOFTWARE is installed on a server, used by a terminal server, by a network share, or is installed on a Machine or virtual machine (each use counts). - -3.2.2.1 If the Commercial License has been purchased with a one-time fee, it is granted permanently (for an unlimited period of time), limited by clause 7. - -3.2.2.2 If the Commercial License has been purchased by a subscription, it is only valid for the agreed-upon subscription period, limited by clause 7. The subscription extends automatically by the subscription period. Both, licensee and licensor, can cancel the subscription at any time; the termination will become effective at the end of the subscription period. - -3.3 The licensee undertakes to take care that the intended use of the SOFTWARE is assured by appropriate technical and organizational measures. - -3.4 The licensee is obliged to back up the data orderly and regularly (especially with respect to working copies and repositories). - -3.5 The licensee is entitled to produce one backup from the SOFTWARE. - -3.6 The licensee is not entitled to translate, process, arrange the SOFTWARE differently or adapt or alter it and to copy the achieved results. - -3.7 The licensee is not entitled to distribute the SOFTWARE. This comprises any form of sublicensing, especially selling, letting, leasing or lending. Passing on the license file, a digitally signed file containing name and address of the licensee which allows the technical exploitation of the SOFTWARE, is also considered to be a form of distribution. - -3.8 The licensee is not entitled to make the SOFTWARE publicly accessible in such a way that it is accessible to the public at any place and at any time. - -3.9 The licensee undertakes not to decompile the SOFTWARE. - -4 Demo - -4.1 If the SOFTWARE is handed over only for test purposes, the licensor grants the licensee the non-exclusive, non-transferable right, limited to the test period of thirty-one (31) days, to have a non-registered copy of the SOFTWARE (demo) used on a arbitrary number of single-user computers or on a central server or via terminal server clients by a arbitrary number of users simultaneously for test purposes only, that means not productively, which again means to have the SOFTWARE saved, loaded, displayed and run permanently or temporarily. - -4.2 The demo of the SOFTWARE includes a function that terminates the use of the SOFTWARE after the expiration of the test period. After the payment of the agreed-upon license fee and the agreement to said license terms within the agreed-upon scope, this function is deactivated and the licensee gets the agreed-upon rights of use according to these license terms. Bypassing this technical protective function results in an immediate loss of all conceded rights of use. In this case a termination according to clause 5 is no longer applicable. - -5 On-line Functionality - -5.1 The SOFTWARE contains features which will automatically connect to licensor's servers to - - - activate the license, for evaluation, commercial and non-commercial licensing - - check for a new program version - - in case of an internal error, report a "crash footprint" - -The transferred data may contain the user's Git email address and license ID. All other transferred data DON'T contain POTENTIALLY SENSITIVE INFORMATION. - -5.2 Only upon user confirmation the SOFTWARE will send a full "bug report" to licensor for investigation. The contents of a full "bug report" may contain POTENTIALLY SENSITIVE INFORMATION and should be reviewed by the licensee before sending. - -5.3 The licensor treats the information sent by the licensee according to 5.1 and 5.2 as confidential and in accordance with the privacy policy at https://www.syntevo.com/privacy-policy/. - -6 Liability - -6.1 When the SOFTWARE is handed over cost-free or for test purposes, the licensor is only liable in case that malice or gross negligence are blamed on him. - -6.2 The licensee's rights on indemnification due to a certain lack are excluded, if, for example, he has known the lack at the conclusion of the contract, especially when there had been a test phase before that date. If the licensee has not known of a lack due to gross negligence, he can only claim rights with respect to that lack if the licensor had fraudulently concealed the lack or had taken over a guarantee for the quality of that object. - -6.3 In case of data loss the licensor is only liable for the effort that would have been necessary for the recovery of the data, provided that the data backup was properly executed by the licensee. - -6.4 If a third party asserts rights against the licensee because of claimed infringement of the SOFTWARE, the licensee is entitled to the rights defined in the corresponding sales contract. In case there is no sales contract or no such stipulation in the sales contract, the licensor is liable as follows: - - - To cure, the licensor can, at its own expense, either change or replace the SOFTWARE in such a way that it does not infringe the property rights, but still essentially corresponds to the stipulated functional and performance characteristics in a manner reasonable for the licensee, or the licensor can indemnify the licensee against claims by the holder of the property rights. - - If cure is impossible for the licensor or is possible only under disproportionate conditions, the licensor has the right to take back the respective SOFTWARE in exchange for reimbursement of the paid remuneration. In doing so, the licensor must grant the licensee a reasonable phase-out period, unless that is possible only under unreasonable legal or other conditions. - -The licensee's other claims regarding section 6.2 remain unaffected. - -However this only applies, if the licensee immediately notifies the licensor about the claims of the third party, does not accept any claimed infringement and either leaves any legal actions, including possible out-of-court arrangements, to the licensor or only consummates them in agreement with the licensor. - -7 Termination of the Rights of Use - -7.1 The subscription license according to 3.2.2.2 can be terminated by either party with written notice on the agreed-upon period. - -7.2 If the licensee culpably and materially violates the agreed-upon rights of use or intellectual property rights of the entitled person, the licensor can cancel the rights to use the respective SOFTWARE. In this case the license fee will not be refunded. - -7.3 The Non-Commercial License terminates automatically if the usage of SOFTWARE is not considered non-commercial any more according to clause 3.2.1. - -7.4 In case of cancellation the licensee is obliged to destroy the original of the SOFTWARE affected by the cancellation including any documentation and all copies or to return them to the licensor. If demanded by the licensor, the licensee will deliver a statement certifying the destruction. - -7.5 The other legal provisions remain untouched. - -8 Applicable Law and Place of Jurisdiction - -8.1 The law of the Federal Republic of Germany is applicable for all legal relationships of the parties excluding the laws on the international sale of goods. - -8.2 The licensor has the right to change this license agreement for new SOFTWARE versions. By using these new SOFTWARE versions the new license agreement supersedes any older one. - -8.3 If the licensee is a merchant, a juristic person under public law or a person having special property under public law, the exclusive place of jurisdiction for all disputes from this contract will be the licensor's place of business. The same is applicable for the case when the licensee does not have a domestic place of general jurisdiction in Germany or the licensor's residence or habitual abode are not known at the time of filing an action. The licensor's capacity to apply to the court of a different place of jurisdiction remains untouched. - -9 The following third parties have rights on parts of the SOFTWARE: - - - ANTLR, copyright by Terence Parr and Sam Harwell (http://www.antlr.org). - The corresponding license agreement (BSD) can be found at http://www.antlr.org/license.html or in the file licenses/ANTLR-LICENSE within the installation directory of the SOFTWARE. - - Autolink-Java, copyright by Robin Stocker (https://github.com/robinst/autolink-java). - The corresponding license agreement (MIT) can be found at https://github.com/robinst/autolink-java/blob/master/LICENSE or in the file licenses/AUTOLINK-LICENSE within the installation directory of the SOFTWARE. - - Bouncy Castle Crypto API, copyright by Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org/java.html). - The corresponding license agreement (MIT) can be found at http://www.bouncycastle.org/licence.html or in the file licenses/BOUNCY-CASTLE-LICENSE within the installation directory of the SOFTWARE. - - EdDSA-Java, copyright by str4d (https://github.com/str4d/ed25519-java). - The corresponding license agreement (Creative Commons Legal Code) can be found at https://github.com/str4d/ed25519-java/blob/master/LICENSE.txt or in the file licenses/EDDSA-JAVA-LICENSE within the installation directory of the SOFTWARE. - - Flexmark-Java, copyright by Atlassian Pty Ltd and Vladimir Schneider (https://github.com/vsch/flexmark-java) - The corresponding license agreement (BSD) can be found at https://github.com/vsch/flexmark-java/blob/master/LICENSE.txt or in the file licenses/FLEXMARK-LICENSE within the installation directory of the SOFTWARE. - - JavaEWAH, copyright by Daniel Lemire and others (https://github.com/lemire/javaewah). - The corresponding license agreement (Apache License 2.0) can be found at https://github.com/lemire/javaewah/blob/master/LICENSE or in the file licenses/JAVA-EWAH-LICENSE within the installation directory of the SOFTWARE. - - JGit, copyright by various authors (http://www.eclipse.org/jgit/). - The corresponding license agreement (Eclipse Distribution License 1.0) can be found at http://www.eclipse.org/org/documents/edl-v10.php or in the file licenses/JGIT-LICENSE within the installation directory of the SOFTWARE. - - JMySpell, copyright by DreamTangerine (dreamtangerine@hotmail.com) The corresponding license agreement can be found in the file licenses/JMYSPELL-LICENSE within the installation directory of the SOFTWARE. - - JNA, copyright by Timothy Wall and Wayne Meissner (https://github.com/twall/jna) - The corresponding license agreement (LGPL 2.1) can be found at https://github.com/twall/jna/blob/master/LGPL2.1 or in the file licenses/JNA-LICENSE within the installation directory of the SOFTWARE. - - JOpt-Simple, copyright by various authors (http://sourceforge.net/projects/jopt-simple/). The corresponding license agreement (MIT License) can be found at http://pholser.github.io/jopt-simple/license.html or in the file licenses/JOPT-SIMPLE-LICENSE within the installation directory of the SOFTWARE. - - JSch, copyright by Atsuhiko Yamanaka, JCraft, Inc. (http://www.jcraft.com/jsch/). The corresponding license agreement (BSD) can be found at http://www.jcraft.com/jsch/LICENSE.txt or in the file licenses/JSCH-LICENSE within the installation directory of the SOFTWARE. - - JSon-Simple, copyright by various authors (https://code.google.com/p/json-simple/). The corresponding license agreement (Apache License 2.0) can be found at https://code.google.com/p/json-simple/ or in the file licenses/JSON-SIMPLE-LICENSE within the installation directory of the SOFTWARE. - - JZlib, copyright by ymnk, JCraft,Inc. (http://www.jcraft.com/jzlib/) The corresponding license agreement (LGPL) can be found at http://www.jcraft.com/jzlib/LICENSE.txt or in the file licenses/JZLIB-LICENSE within the installation directory of the SOFTWARE. - - LZ4 compression for Java, copyright by various authors (https://github.com/lz4/lz4-java/) The corresponding license agreement (LGPL) can be found at https://github.com/lz4/lz4-java/blob/master/LICENSE.txt or in the file licenses/LZ4J-LICENSE within the installation directory of the SOFTWARE. - - SSHJ, copyright by various authors (https://github.com/hierynomus/sshj). The corresponding license agreement can be found at https://github.com/hierynomus/sshj/blob/master/LICENSE or in the file licenses/SSHJ-LICENSE within the installation directory of the SOFTWARE. - - SLF4J, copyright by QOS.ch (https://www.slf4j.org). The corresponding license agreement (MIT) can be found at https://www.slf4j.org/license.html or in the file licenses/SLF4J-LICENSE within the installation directory of the SOFTWARE. - - Snakeyaml, copyright by various authors (https://bitbucket.org/asomov/snakeyaml-engine/src/default/). The corresponding license agreement (Apache License 2.0) can be found at https://bitbucket.org/asomov/snakeyaml-engine/src/default/LICENSE.txt or in the file licenses/SNAKEYML-LICENSE within the installation directory of the SOFTWARE. - - SWT, copyright by Eclipse (http://eclipse.org). The corresponding license agreement can be found at http://www.eclipse.org/legal/epl-v10.html or in the file licenses/SWT-LICENSE within the installation directory of the SOFTWARE. - - SVNKit (including SQLJet), copyright by TMate Software s.r.o. (http://svnkit.com/) The corresponding license agreement (TMate License) can be found in the file licenses/SVNKIT-LICENSE within the installation directory of the SOFTWARE. - - Trilead SSH API, Copyright (c) Trilead AG (http://www.trilead.com) and others. - The corresponding license agreement can be found in the file licenses/TRILEAD-LICENSE within the installation directory of the SOFTWARE. - -The licensee is obliged to abide by these third parties license terms. Consequently they are part of this license agreement. diff --git a/docs/licenses/snyk.txt b/docs/licenses/snyk.txt deleted file mode 100644 index 50592a8501..0000000000 --- a/docs/licenses/snyk.txt +++ /dev/null @@ -1,182 +0,0 @@ -https://snyk.io/policies/terms-of-service-2020/ - -Terms of Service 2020 - -April 8, 2020 - -Your attention is drawn to Section 5 (which includes certain disclaimers regarding the Snyk Service) and Section 9 (under which you give us the right to use data about your project for analytics and project improvement purposes). - -Unless you or the organization to which you belong have entered into a separate agreement with us covering your subscription, these Terms of Service (together with the documents referred to in them) tell you the terms on which you may make use of the services which are made available to you via our software platform at https://snyk.io and our CLI tool (together, the “Platform”) (the “Services”), as well as all information and data made available to you in connection with the Services (“Service Data”). - -If you have entered into a separate Enterprise Subscription agreement with us for your and your employees, contractors and agents use of the Services, then that agreement will apply instead of these Terms of Service. - -Please read these Terms of Service carefully before you start to use the Services. We recommend that you print a copy of this for future reference. - -By using the Services or installing our CLI tool, you are confirming that you accept these Terms of Service and that you agree to comply with them. If you do not agree to these Terms of Service, you must not use the Services. - -Other applicable terms - -These Terms of Service refer to the following additional terms, which also apply to your use of the Services: - -Our Privacy Policy, which sets out the terms on which we process any personal data we collect from you, or that you provide to us. -Our Acceptable Use Policy, which sets out the permitted uses and prohibited uses of our Services, including any material and data transmitted using the Services. -Our Data Processing Addendum, which describes how we will process any personal data on your behalf. -Our Cookies Policy, which sets out information about the cookies on the Platform. - -1. Introduction - -Snyk ** (“Snyk”, “us”, “our” or “we”) is the provider of the Services and operator of the Platform. - -2. Changes to these terms - -We may revise these Terms of Service at any time by amending this page. Where appropriate (for example, where the changes limit your rights or increase your obligations), we will also give you notice of the changes by sending you an email – where possible, a reasonable time in advance. The process for changes to fees and payment plans is set out separately in clause 8. If you are using the Services as an anonymous user (and therefore on a session by session basis), the version of these Terms of Service which is in force at the beginning of a particular session will apply to your use of the Services during that particular session. We will not notify you of changes. - -Please check this page from time to time and give careful consideration to any emails we send you, as the changes to the Terms of Service will be binding on you. - -3. User account - -If you register for an account on the Platform (including via third party authentication), you are responsible for any use of our Services with your account details, and for protecting your account details from unauthorised use. You are also responsible for the security of any computer from which you sign in to your account. - -You may also use our Services as an anonymous user, in which case no account is necessary. - -In the case of a company, you hereby warrant that you have authority to bind the company (as the contracting party) to these Terms of Service. In the case of an individual, you hereby warrant that you are 18 years of age or older or, where you are not, that you have the consent of your parent or guardian to use the Services in accordance with these Terms of Service. - -4. Rights we grant you - -We grant you a non-exclusive, non-transferable, revocable right to install the CLI tool and make use of the Services and to make use of the documentation available at https://snyk.io/docs (“Documentation”) and Service Data, solely for your internal business operations and in accordance with these Terms of Service. - -This licence shall remain in effect until and unless these Terms of Service are terminated by you or us (see clause 13 for information about termination). You promise and agree that you will only make use of the Services, Documentation and Service Data within the scope of this licence and limits, terms and conditions set out in these Terms of Service, and that you will not redistribute or transfer the Services, Documentation or Service Data, or any part of them. - -You acknowledge that all intellectual property rights in the Services, the Documentation, the Service Data and the Platform anywhere in the world belong to us or our licensors (even after installation onto a computer owned by you or integration into your system), and that you have no rights in or to the Services, Documentation, Service Data or the Platform other than the right to use each of them in accordance with the terms of these Terms of Service. - -In the event that you breach the terms of the licence granted to you herein and such breach results in the creation of derivative works of the Services, the Documentation, and/or the Service Data (“Improvements”), you hereby assign with full title guarantee all such Improvements to us. Such assignment does not preclude us from taking any legal or other action against you for contravention of these terms and conditions, including for infringement of our intellectual property rights. - -You may from time-to-time provide suggestions, comments, ideas or other feedback (“Feedback”) to us with respect to the Services, Service Data, Platform or Documentation. To the extent that you provide such Feedback, you grant us an unlimited licence right and license to use, disclose, reproduce, license or otherwise distribute and exploit the Feedback as we see fit, entirely without obligation or payment to you or restriction of any kind. - -5. Services - -From time to time, we may introduce new services, features or functionality to the Services. These Terms of Service will apply to such new services, features or functionality, unless they come with separate or additional terms, in which case you will be required to agree to such separate or additional terms before being permitted to use the new services, features or functionality. - -We undertake that the Services will be performed substantially in accordance with the Documentation. This undertaking shall not apply to the extent of any non-conformance which is caused by your use of the Services contrary to our instructions or these Terms of Service, or any alternation or modification made to the Services or the software used in the provision of the Services by a third party who is not authorised by us. You understand and agree that we have no obligation to modify software to support your use of the Services. - -You acknowledge that the accuracy and completeness of the Services is dependent on a number of factors outside the control of Snyk, including design, implementation, and use of your project, erroneous dependency or vulnerability data, and changes to the environment in which your project is used. - -We do not warrant that: --the Services will be able to find and monitor all vulnerabilities in all dependencies (including open source dependencies) included or used by your application or code. Whilst we endeavour to keep up to date and build on our open source vulnerability database, you acknowledge that it does not provide any legal or other professional advice in relation to the Services and that we do not guarantee it is a complete source of all vulnerabilities and license issues for all dependencies or that it is relevant or suited to all the dependencies included or used by your code or applications; --we will be able to fix all vulnerabilities discovered using the Services; or --a patch or recommended version upgrade will not break the functionality of your code or will not result in the introduction of new vulnerabilities. We take care when authoring patches and test all patches before making them available to you as part of the Services; however, you acknowledge that it is your responsibility to assess the impact of patch before using it. -You further acknowledge that the suggestions made by us in relation to fixes (whether for updates, patches or monitoring services) are provided for general information only, and have not been made with your particular requirements in mind. It is therefore not intended to amount to advice on which you should solely rely. - -We will not be liable to you for our failure to find, fix and monitor dependencies, or for any damage or loss suffered as a result of a fix deployed. - -6. Accessing the Services - -You acknowledge that the extent of your use of the Services will depend on your subscription plan, and you further agree to only use the Services within the limits of such subscription plan. The default subscription plan is the ‘Free Plan’. Details about the Services available under each subscription plan can be found here: https://snyk.io/plans. If, at any time whilst using the Services, you exceed the limit of your subscription plan, we will charge you, and you will pay, the relevant fees which apply to such excess use as set out above. We will use our reasonable endeavours to notify you when the limit is close to being exceeded; however, we accept no liability for failure to do this. You are solely responsible for monitoring your use of the Services within the permitted limits. - -Whilst we will make reasonable efforts to ensure the Services are operational 24 hours a day, 7 days a week, we do not guarantee that the Services will always be available or be uninterrupted. In particular, but without limitation: - -Maintenance Services: The Services will not be available to you when we carry out maintenance services. We will endeavour to carry out these services outside of normal business hours (being 9:00am to 5:00pm UK time) and to give you at least three hours’ notice in writing (via email where possible); however you acknowledge that this may not be possible in cases of urgency. -Communication networks: The Services may be subject to limitations, delays and other problems inherent in the use of communication networks and facilities. -We will not be liable to you if the Services are unavailable at any time, or for any period due to an event or cause outside of our control. - -We reserve the right to suspend your access to or use of the Services without notice in the event you breach these Terms of Service or if we reasonably suspect that you have breached these Terms of Service. - -7. Using the Services - -You must not use the Services for any commercial use (other than for internal use within your business), and you must not redistribute or transfer the Services, Platform, Documentation or Service Data to any third party or make any part of the Services, Documentation or Service Data available to be accessed, in whole or in part, by any third party. - -The licence granted to you to install and make use of the Services, the Platform, the Documentation, and Service Data, does not permit you to do, and you shall not do nor permit any third party to do, any of the following: - -- Embed our Services, Platform, Documentation, or Service Data into any product of yours or any third party; -- Make available through automated or manual means any part of the Services, the Platform, the Documentation, or the Service Data, by way of crawling, scraping, spidering or otherwise; -- Copy or access all or any part of the Services, the Platform, the Documentation, or the Service Data other than via the interface(s) provided to you by us; -- Use web-crawlers, bots, or scripts to copy or access any part of the Services, the Documentation or the Service Data; -- Circumvent or attempt to override any security features we have installed around the Services, the Platform, the Documentation, or the Service Data; or -- Copy in part or in whole, our database of vulnerabilities. - -You further agree to comply with the Acceptable Use Policy at https://snyk.io/policies/acceptable-use-policy with regards to your use of the Services, Platform, Service Data and Documentation, including any material and data you transmit using the Services. This Acceptable Use Policy is hereby incorporated into these Terms of Service. - -A breach of the Acceptable Use Policy will constitute a breach of these Terms of Service, and may result in termination or suspension of your account in accordance with these Terms of Service. - -You shall comply with all applicable laws, rules and regulations that apply to your use of the Services, and comply with all applicable laws, rules and regulations governing export that apply to the Services. - -8. Purchases - -If you choose a paid-subscription plan, you agree to pay us fees in accordance with the relevant pricing plan. Details of those fees are set out on our Pricing Page at https://snyk.io/plans (which do not include VAT). - -Depending on the pricing plan chosen by you, our third party payment processor will (and you hereby authorise it to) bill your payment card for the applicable fee in advance on or shortly after the date you subscribe for a paid plan and each month or anniversary thereafter, until terminated by you or us. The fees are non-refundable, except as expressly stated otherwise in these Terms of Service. - -If you move to a higher tier of a paid plan, the change will take effect immediately and we will charge you for the additional fees associated with the new paid plan on a pro-rata basis. If you move to a lower tier of a paid plan, the change will take effect in the next billing cycle. You acknowledge that you will not receive a refund for the then-current billing cycle if you move to a lower tier of a paid plan, or to a non-payment subscription plan. - -We reserve the right not to provide you with the Services until the relevant fee has been received in full and cleared funds. - -We also reserve the right to change our fees or payment plans at any time. If you do not agree to such change, you must ask us to delete your account via email to support@snyk.io and stop using the Services within 30 days of the date the new fee or payment plan becomes effective, at which point these Terms of Service will be deemed to have been terminated by you. We will only charge you in respect of the period before termination and based on the old fee or payment plan. If you do agree to such change (which will be deemed from your continued use of the Services after the date the new fee or payment plan becomes effective), your next bill will include the new fees on a pro rata basis. - -You will pay fees without any set-off, counterclaim, deduction or withholding of any kind, except as may be required by law. If any withholding or deduction is required by law, you will, when making the payment to which the withholding or deduction relates, pay to us such additional amount as will ensure that we receive the same total amount that it would have received if no such withholding or deduction had been required. - -9. Project Data and Rights you give us - -We claim no intellectual property rights in and to your applications, project or any material you provide or otherwise transmit to us via the Service. - -However, to enable your use of the Services, we do need to inspect portions of your project and send parts of it to our servers. This information includes, but is not limited to, information relating to the project (such as the project name and metadata), information relating to the dependencies (including open source and closed source locally available to our tool), being used and how they are referenced by the project, Snyk-related files and environmental information (“Project Information”). We may also collect Project Information for each of the project’s dependencies (“Dependency Information”). For these purposes, we require, and you hereby grant us, a worldwide, non-exclusive, royalty free licence to store, use, reproduce, display and transmit the Project Information, the Dependency Information and any other materials transmitted via the Service to the extent necessary to enable your use of the Services, including monitoring services. This licence shall remain in effect until and unless these Terms of Service are terminated by you or us. - -In addition to the rights granted to us above, we also require, and you hereby grant to us, a licence to store, use, reproduce, display and transmit the Project Information, the Dependency Information and any other materials transmitted via the Service for analytical purposes (for example, so that we can see what stage the project was in when it was deleted) and to improve our Services. This licence shall remain in effect unless and until you email us at support@snyk.io and expressly ask us to delete such data from our database. For the avoidance of doubt, this licence will not end upon termination of these Terms of Service or where you delete the relevant project on the project page of the Platform. - -10. Usage Data Analysis - -Our CLI tool reports to us an event for each command you issue, including, but not limited to, the version of the CLI tool, the versions of surrounding tools such as node and npm, the Snyk User and organization ID, the arguments and inputs provided to the CLI, and details about duration, success and failure of CLI actions. This information is used by us for analytical purposes and to improve our Services. It allows us to better understand how the CLI tool is used, and informs our product development decisions. - -If you would like to opt out of this, you can do so by setting the disable-analytics configuration item, as explained in our FAQ page at https://snyk.io/docs/faqs/#using-snyk. - -11. Limitation of Liability - -You agree to the following limitations on our liability to you: - -- Exclusion of certain losses: We shall not be liable to you for any loss of profits, business, anticipated savings, goodwill or business opportunity, business interruption, loss or corruption of data or information, or for any special, indirect or consequential loss or damage, howsoever arising under these Terms of Service; and -- Cap on liability: Our maximum aggregate liability to you in contract, tort (including negligence or breach of statutory duty), misrepresentation, restitution or otherwise, arising in connection with the performance or contemplated performance of these Terms of Service shall be limited to the higher of either: i) the total amount paid to us in the 12 month period preceding the date on which the claim arose; or ii) £100. - Further, due to the nature of the Services, we do not take responsibility for any damage caused by errors or omissions in any content or omissions in any information, instructions or scripts provided by you to us in connection with the Services, or any action taken by us (or not taken by us) at your direction. - -Nothing in these Terms of Service shall exclude either party’s liability for death or personal injury caused by negligence, fraud or fraudulent misrepresentation or any other liability that cannot be excluded or limited by law. - -All warranties, conditions, representations or other terms implied by statute or common law in relation to the Services, Documentation, the Service Data and the Platform provided by us are excluded to the fullest extent permitted by law. - -12. Indemnity - -You shall defend, indemnify and hold harmless Snyk against claims, actions, proceedings, losses, damages, expenses and costs (including without limitation court costs and reasonable legal fees) arising out of or in connection with your use of the Services, Documentation, Service Data or Platform in breach of these Terms of Service or other agreements referred to in these Terms of Service (including, but not limited to, the acceptable use policy https://snyk.io/policies#acceptable-use-policy). - -13. Termination - -You may terminate these Terms of Service with us at any time, for any reason, by asking us to delete your account via email to support@snyk.io (as applicable) and ceasing all use of the Services, Documentation and Service Data. - -We may terminate these Terms of Service with you immediately, without notice, in the event you commit a material or persistent breach of these Terms of Service (including the Acceptable Use Policy), the Services are discontinued, we lose the right to provide you with the Services, or where the provision of the Services becomes unlawful. - -Upon termination: - -- all rights granted to you under these Terms of Service, including the licence in clause 4, shall cease; -- you must cease all activities authorised by these Terms of Service, including use of the Services, Documentation and Service Data; -- all fees payable to us under these Terms of Service shall become due and shall be billed immediately, despite any other provision; and -- you must immediately uninstall, delete or remove from all computer equipment in your possession or control, and destroy or return to us all copies of, any software used in the provision of the Services including our CLI tool. - -14. Other important terms - -Assignment and other dealings: You may not assign, transfer, sub-licence or deal in any other manner with any or all of your rights under these Terms of Service, without our prior written consent. - -Waiver: A waiver of any right or remedy under these Terms of Service or by law is only effective if given in writing and shall not be deemed a waiver of any subsequent breach or default. A failure or delay by a party to exercise any right or remedy provided under these Terms of Service or by law shall not constitute a waiver of that or any other right or remedy, nor shall it prevent or restrict any further exercise of that or any other right or remedy. - -Severance: If any provision or part-provision of these Terms of Service is or becomes invalid, illegal or unenforceable, it shall be deemed modified to the minimum extent necessary to make it valid, legal and enforceable. If such modification is not possible, the relevant provision or part-provision shall be deemed deleted. Any such modification to or deletion of a provision or part-provision shall not affect the validity and enforceability of the rest of these Terms of Service. - -Relationship: Nothing in these Terms of Service is intended to, or shall be deemed to, establish any partnership or joint venture between any of the parties or constitute any party the agent of another party. - -Third Party Rights: No one other than a party to these Terms of Service, their successors and permitted assignees, shall have any right to enforce any of its terms. - -Entire Agreement: these Terms of Service, and all documents referred to in them, constitute the entire agreement between the parties and supersedes and extinguishes all previous agreements, promises, assurances, warranties, representations and understandings between them, whether written or oral, relating to its subject matter. - -Jurisdiction/ Governing Law: These Terms of Service, its subject matter and its formation (and any non-contractual disputes or claims) are governed by English law. We both agree to the exclusive jurisdiction of the courts of England and Wales. - -15. Contact us - -To contact us, or if you are experiencing problems with the Services, please email support@snyk.io - -** If you are based in the United States, Snyk shall mean: Snyk, Inc., having a place of business at 100 Summer Street, Boston, MA 02110. - -If you are based outside of the United States, Snyk shall mean: Snyk Ltd, having its registered place of business at Highlands House, Basingstoke Road, Spencers Wood, Reading, Berkshire, RG7 1NT United Kingdom. Our VAT number is 227200547. diff --git a/docs/licenses/ssh.txt b/docs/licenses/ssh.txt deleted file mode 100644 index a188b70200..0000000000 --- a/docs/licenses/ssh.txt +++ /dev/null @@ -1,187 +0,0 @@ -https://cvsweb.openbsd.org/src/usr.bin/ssh/LICENCE?rev=HEAD - -This file is part of the OpenSSH software. - -The licences which components of this software fall under are as -follows. First, we will summarize and say that all components -are under a BSD licence, or a licence more free than that. - -OpenSSH contains no GPL code. - -1) - * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland - * All rights reserved - * - * As far as I am concerned, the code I have written for this software - * can be used freely for any purpose. Any derived versions of this - * software must be clearly marked as such, and if the derived work is - * incompatible with the protocol description in the RFC file, it must be - * called by a name other than "ssh" or "Secure Shell". - - [Tatu continues] - * However, I am not implying to give any licenses to any patents or - * copyrights held by third parties, and the software includes parts that - * are not under my direct control. As far as I know, all included - * source code is used in accordance with the relevant license agreements - * and can be used freely for any purpose (the GNU license being the most - * restrictive); see below for details. - - [However, none of that term is relevant at this point in time. All of - these restrictively licenced software components which he talks about - have been removed from OpenSSH, i.e., - - - RSA is no longer included, found in the OpenSSL library - - IDEA is no longer included, its use is deprecated - - DES is now external, in the OpenSSL library - - GMP is no longer used, and instead we call BN code from OpenSSL - - Zlib is now external, in a library - - The make-ssh-known-hosts script is no longer included - - TSS has been removed - - MD5 is now external, in the OpenSSL library - - RC4 support has been replaced with ARC4 support from OpenSSL - - Blowfish is now external, in the OpenSSL library - - [The licence continues] - - Note that any information and cryptographic algorithms used in this - software are publicly available on the Internet and at any major - bookstore, scientific library, and patent office worldwide. More - information can be found e.g. at "http://www.cs.hut.fi/crypto". - - The legal status of this program is some combination of all these - permissions and restrictions. Use only at your own responsibility. - You will be responsible for any legal consequences yourself; I am not - making any claims whether possessing or using this is legal or not in - your country, and I am not taking any responsibility on your behalf. - - - NO WARRANTY - - BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY - FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN - OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES - PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED - OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS - TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE - PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, - REPAIR OR CORRECTION. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING - WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR - REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, - INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING - OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED - TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY - YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER - PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE - POSSIBILITY OF SUCH DAMAGES. - -3) - ssh-keyscan was contributed by David Mazieres under a BSD-style - license. - - * Copyright 1995, 1996 by David Mazieres . - * - * Modification and redistribution in source and binary forms is - * permitted provided that due credit is given to the author and the - * OpenBSD project by leaving this copyright notice intact. - -4) - The Rijndael implementation by Vincent Rijmen, Antoon Bosselaers - and Paulo Barreto is in the public domain and distributed - with the following license: - - * @version 3.0 (December 2000) - * - * Optimised ANSI C code for the Rijndael cipher (now AES) - * - * @author Vincent Rijmen - * @author Antoon Bosselaers - * @author Paulo Barreto - * - * This code is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -5) - One component of the ssh source code is under a 3-clause BSD license, - held by the University of California, since we pulled these parts from - original Berkeley code. - - * Copyright (c) 1983, 1990, 1992, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - -6) - Remaining components of the software are provided under a standard - 2-term BSD licence with the following names as copyright holders: - - Markus Friedl - Theo de Raadt - Niels Provos - Dug Song - Aaron Campbell - Damien Miller - Kevin Steves - Daniel Kouril - Wesley Griffin - Per Allansson - Nils Nordman - Simon Wilkinson - - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ------- -$OpenBSD: LICENCE,v 1.20 2017/04/30 23:26:16 djm Exp $ diff --git a/docs/licenses/terraform.txt b/docs/licenses/terraform.txt deleted file mode 100644 index b6b0c2c8d0..0000000000 --- a/docs/licenses/terraform.txt +++ /dev/null @@ -1,355 +0,0 @@ -https://github.com/hashicorp/terraform/blob/main/LICENSE - -Mozilla Public License, version 2.0 - -1. Definitions - -1.1. “Contributor” - - means each individual or legal entity that creates, contributes to the - creation of, or owns Covered Software. - -1.2. “Contributor Version” - - means the combination of the Contributions of others (if any) used by a - Contributor and that particular Contributor’s Contribution. - -1.3. “Contribution” - - means Covered Software of a particular Contributor. - -1.4. “Covered Software” - - means Source Code Form to which the initial Contributor has attached the - notice in Exhibit A, the Executable Form of such Source Code Form, and - Modifications of such Source Code Form, in each case including portions - thereof. - -1.5. “Incompatible With Secondary Licenses” - means - - a. that the initial Contributor has attached the notice described in - Exhibit B to the Covered Software; or - - b. that the Covered Software was made available under the terms of version - 1.1 or earlier of the License, but not also under the terms of a - Secondary License. - -1.6. “Executable Form” - - means any form of the work other than Source Code Form. - -1.7. “Larger Work” - - means a work that combines Covered Software with other material, in a separate - file or files, that is not Covered Software. - -1.8. “License” - - means this document. - -1.9. “Licensable” - - means having the right to grant, to the maximum extent possible, whether at the - time of the initial grant or subsequently, any and all of the rights conveyed by - this License. - -1.10. “Modifications” - - means any of the following: - - a. any file in Source Code Form that results from an addition to, deletion - from, or modification of the contents of Covered Software; or - - b. any new file in Source Code Form that contains any Covered Software. - -1.11. “Patent Claims” of a Contributor - - means any patent claim(s), including without limitation, method, process, - and apparatus claims, in any patent Licensable by such Contributor that - would be infringed, but for the grant of the License, by the making, - using, selling, offering for sale, having made, import, or transfer of - either its Contributions or its Contributor Version. - -1.12. “Secondary License” - - means either the GNU General Public License, Version 2.0, the GNU Lesser - General Public License, Version 2.1, the GNU Affero General Public - License, Version 3.0, or any later versions of those licenses. - -1.13. “Source Code Form” - - means the form of the work preferred for making modifications. - -1.14. “You” (or “Your”) - - means an individual or a legal entity exercising rights under this - License. For legal entities, “You” includes any entity that controls, is - controlled by, or is under common control with You. For purposes of this - definition, “control” means (a) the power, direct or indirect, to cause - the direction or management of such entity, whether by contract or - otherwise, or (b) ownership of more than fifty percent (50%) of the - outstanding shares or beneficial ownership of such entity. - - -2. License Grants and Conditions - -2.1. Grants - - Each Contributor hereby grants You a world-wide, royalty-free, - non-exclusive license: - - a. under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or as - part of a Larger Work; and - - b. under Patent Claims of such Contributor to make, use, sell, offer for - sale, have made, import, and otherwise transfer either its Contributions - or its Contributor Version. - -2.2. Effective Date - - The licenses granted in Section 2.1 with respect to any Contribution become - effective for each Contribution on the date the Contributor first distributes - such Contribution. - -2.3. Limitations on Grant Scope - - The licenses granted in this Section 2 are the only rights granted under this - License. No additional rights or licenses will be implied from the distribution - or licensing of Covered Software under this License. Notwithstanding Section - 2.1(b) above, no patent license is granted by a Contributor: - - a. for any code that a Contributor has removed from Covered Software; or - - b. for infringements caused by: (i) Your and any other third party’s - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - - c. under Patent Claims infringed by Covered Software in the absence of its - Contributions. - - This License does not grant any rights in the trademarks, service marks, or - logos of any Contributor (except as may be necessary to comply with the - notice requirements in Section 3.4). - -2.4. Subsequent Licenses - - No Contributor makes additional grants as a result of Your choice to - distribute the Covered Software under a subsequent version of this License - (see Section 10.2) or under the terms of a Secondary License (if permitted - under the terms of Section 3.3). - -2.5. Representation - - Each Contributor represents that the Contributor believes its Contributions - are its original creation(s) or it has sufficient rights to grant the - rights to its Contributions conveyed by this License. - -2.6. Fair Use - - This License is not intended to limit any rights You have under applicable - copyright doctrines of fair use, fair dealing, or other equivalents. - -2.7. Conditions - - Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in - Section 2.1. - - -3. Responsibilities - -3.1. Distribution of Source Form - - All distribution of Covered Software in Source Code Form, including any - Modifications that You create or to which You contribute, must be under the - terms of this License. You must inform recipients that the Source Code Form - of the Covered Software is governed by the terms of this License, and how - they can obtain a copy of this License. You may not attempt to alter or - restrict the recipients’ rights in the Source Code Form. - -3.2. Distribution of Executable Form - - If You distribute Covered Software in Executable Form then: - - a. such Covered Software must also be made available in Source Code Form, - as described in Section 3.1, and You must inform recipients of the - Executable Form how they can obtain a copy of such Source Code Form by - reasonable means in a timely manner, at a charge no more than the cost - of distribution to the recipient; and - - b. You may distribute such Executable Form under the terms of this License, - or sublicense it under different terms, provided that the license for - the Executable Form does not attempt to limit or alter the recipients’ - rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - - You may create and distribute a Larger Work under terms of Your choice, - provided that You also comply with the requirements of this License for the - Covered Software. If the Larger Work is a combination of Covered Software - with a work governed by one or more Secondary Licenses, and the Covered - Software is not Incompatible With Secondary Licenses, this License permits - You to additionally distribute such Covered Software under the terms of - such Secondary License(s), so that the recipient of the Larger Work may, at - their option, further distribute the Covered Software under the terms of - either this License or such Secondary License(s). - -3.4. Notices - - You may not remove or alter the substance of any license notices (including - copyright notices, patent notices, disclaimers of warranty, or limitations - of liability) contained within the Source Code Form of the Covered - Software, except that You may alter any license notices to the extent - required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - - You may choose to offer, and to charge a fee for, warranty, support, - indemnity or liability obligations to one or more recipients of Covered - Software. However, You may do so only on Your own behalf, and not on behalf - of any Contributor. You must make it absolutely clear that any such - warranty, support, indemnity, or liability obligation is offered by You - alone, and You hereby agree to indemnify every Contributor for any - liability incurred by such Contributor as a result of warranty, support, - indemnity or liability terms You offer. You may include additional - disclaimers of warranty and limitations of liability specific to any - jurisdiction. - -4. Inability to Comply Due to Statute or Regulation - - If it is impossible for You to comply with any of the terms of this License - with respect to some or all of the Covered Software due to statute, judicial - order, or regulation then You must: (a) comply with the terms of this License - to the maximum extent possible; and (b) describe the limitations and the code - they affect. Such description must be placed in a text file included with all - distributions of the Covered Software under this License. Except to the - extent prohibited by statute or regulation, such description must be - sufficiently detailed for a recipient of ordinary skill to be able to - understand it. - -5. Termination - -5.1. The rights granted under this License will terminate automatically if You - fail to comply with any of its terms. However, if You become compliant, - then the rights granted under this License from a particular Contributor - are reinstated (a) provisionally, unless and until such Contributor - explicitly and finally terminates Your grants, and (b) on an ongoing basis, - if such Contributor fails to notify You of the non-compliance by some - reasonable means prior to 60 days after You have come back into compliance. - Moreover, Your grants from a particular Contributor are reinstated on an - ongoing basis if such Contributor notifies You of the non-compliance by - some reasonable means, this is the first time You have received notice of - non-compliance with this License from such Contributor, and You become - compliant prior to 30 days after Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent - infringement claim (excluding declaratory judgment actions, counter-claims, - and cross-claims) alleging that a Contributor Version directly or - indirectly infringes any patent, then the rights granted to You by any and - all Contributors for the Covered Software under Section 2.1 of this License - shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user - license agreements (excluding distributors and resellers) which have been - validly granted by You or Your distributors under this License prior to - termination shall survive termination. - -6. Disclaimer of Warranty - - Covered Software is provided under this License on an “as is” basis, without - warranty of any kind, either expressed, implied, or statutory, including, - without limitation, warranties that the Covered Software is free of defects, - merchantable, fit for a particular purpose or non-infringing. The entire - risk as to the quality and performance of the Covered Software is with You. - Should any Covered Software prove defective in any respect, You (not any - Contributor) assume the cost of any necessary servicing, repair, or - correction. This disclaimer of warranty constitutes an essential part of this - License. No use of any Covered Software is authorized under this License - except under this disclaimer. - -7. Limitation of Liability - - Under no circumstances and under no legal theory, whether tort (including - negligence), contract, or otherwise, shall any Contributor, or anyone who - distributes Covered Software as permitted above, be liable to You for any - direct, indirect, special, incidental, or consequential damages of any - character including, without limitation, damages for lost profits, loss of - goodwill, work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses, even if such party shall have been - informed of the possibility of such damages. This limitation of liability - shall not apply to liability for death or personal injury resulting from such - party’s negligence to the extent applicable law prohibits such limitation. - Some jurisdictions do not allow the exclusion or limitation of incidental or - consequential damages, so this exclusion and limitation may not apply to You. - -8. Litigation - - Any litigation relating to this License may be brought only in the courts of - a jurisdiction where the defendant maintains its principal place of business - and such litigation shall be governed by laws of that jurisdiction, without - reference to its conflict-of-law provisions. Nothing in this Section shall - prevent a party’s ability to bring cross-claims or counter-claims. - -9. Miscellaneous - - This License represents the complete agreement concerning the subject matter - hereof. If any provision of this License is held to be unenforceable, such - provision shall be reformed only to the extent necessary to make it - enforceable. Any law or regulation which provides that the language of a - contract shall be construed against the drafter shall not be used to construe - this License against a Contributor. - - -10. Versions of the License - -10.1. New Versions - - Mozilla Foundation is the license steward. Except as provided in Section - 10.3, no one other than the license steward has the right to modify or - publish new versions of this License. Each version will be given a - distinguishing version number. - -10.2. Effect of New Versions - - You may distribute the Covered Software under the terms of the version of - the License under which You originally received the Covered Software, or - under the terms of any subsequent version published by the license - steward. - -10.3. Modified Versions - - If you create software not governed by this License, and you want to - create a new license for such software, you may create and use a modified - version of this License if you rename the license and remove any - references to the name of the license steward (except to note that such - modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses - If You choose to distribute Source Code Form that is Incompatible With - Secondary Licenses under the terms of this version of the License, the - notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice - - This Source Code Form is subject to the - terms of the Mozilla Public License, v. - 2.0. If a copy of the MPL was not - distributed with this file, You can - obtain one at - http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular file, then -You may include the notice in a location (such as a LICENSE file in a relevant -directory) where a recipient would be likely to look for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - “Incompatible With Secondary Licenses” Notice - - This Source Code Form is “Incompatible - With Secondary Licenses”, as defined by - the Mozilla Public License, v. 2.0. diff --git a/docs/licenses/viscosity.txt b/docs/licenses/viscosity.txt deleted file mode 100644 index 8d111c8ec2..0000000000 --- a/docs/licenses/viscosity.txt +++ /dev/null @@ -1,198 +0,0 @@ -https://www.sparklabs.com/company/legal/ - -Viscosity End User License Agreement (EULA) - -By installing, operating, distributing, or otherwise using Viscosity you agree to the terms of this End User License Agreement. If you do not agree to the terms of this License Agreement, do not install or use Viscosity. - -1. LICENSE. Your rights and obligations with respect to the use of Viscosity are as follows. You may: (a) make copies of Viscosity for archival purposes, or copy Viscosity onto the hard disk of your computer and retain the original for archival purposes; and (b) use Viscosity in accordance with any additional permitted uses set forth below. You may not: (a) sublicense, rent, or lease any portion of Viscosity; reverse engineer, decompile, disassemble, modify, translate, make any attempt to discover the source code of Viscosity, or create derivative works from Viscosity; (b) use Viscosity in any manner not authorized by this license; nor (c) use Viscosity in any manner that contradicts any additional restrictions set forth below. - -2. EVALUATION. Viscosity is not free software. You may use this software for evaluation purposes without charge for a period of 30 days. To continue using the software beyond the 30 day evaluation period, you must register it. - -3. REGISTRATION. (a) Individual Registration. If you have purchased an individual registration, also known as a "single user" and "single person" registration, you may install and register Viscosity on up to three computers, provided that you are the sole user of Viscosity on these computer/s. (b) Volume Registration. A company or other organization may purchase a volume registration, also known as a "volume license" and a "site license". A volume registration permits either (i) Viscosity to be installed and registered on multiple computers owned or operated by the company or organization, where each installation counts as an allocated seat; or (ii) Viscosity to be installed and registered by a number of users, where each user counts as an allocated seat, and each user may install and register Viscosity on up to three computers owned or operated by the company or organization provided that they are the sole user of Viscosity on these computer/s. As long as the total number of seat allocations do not exceed the registered number of seats, you may: (i) distribute Viscosity to users as governed by section 4 below; and (ii) allow an employee to install and register Viscosity on their personal computer for use in the course of their employment, where the user or their computer is as an allocated seat. (c) Server-Locked Registration. If you have purchased a server-locked registration, also known as a "server-locked license", as long as the total number of seat allocations do not exceed the registered number of seats, you may: (i) distribute Viscosity to users as governed by section 4 below; and (ii) allow any user associated with your company or organization, including, but not limited to, employees, students, parents, contractors, and visitors, to install and register Viscosity on their work or personal computer/s for the sole use of connecting to the server DNS or IP addresses associated with the registration. You may not have VPN server/s available at the DNS or IP addresses associated with the registration that are part of a commercial VPN service of which the registered users are customers. - -4. REDISTRIBUTION. You may provide unmodified copies of Viscosity under these conditions: (a) distribute only the unregistered version of Viscosity; (b) provide exact, unmodified copies of Viscosity including all accompanying files, this License Agreement, and all copyright, trademark, proprietary, and other notices; (c) reference the official Viscosity website of https://www.sparklabs.com/viscosity/; and (d) the total price of a compilation (such as a DVD-Disc) that includes Viscosity must be no more than $1 USD per package in the compilation. You may provide modified copies of Viscosity under these conditions: (a) Viscosity is only modified by bundling it to include connection files and/or preference settings; and (b) You have a Volume Registration or Server-Locked Registration and are distributing Viscosity only to users or machines covered by this registration. - -5. UPGRADES. Upgraded versions of Viscosity are announced on the Viscosity website. Upgrades may include additional features and/or bug fixes over previous releases. Registered users are entitled to receive free upgrades for all minor-point (1.x) version releases. - -6. SUPPORT. Support is available online via the Viscosity website and email under these conditions: (a) we guarantee that we will respond to the best of our ability, and as promptly as is reasonably possible, to all reasonable queries received from registered users; and (b) in the case of registrations that cover multiple users, we may ask that the organization designate one user as the contact point to receive support requests from other users and filter out duplicates. - -7. THIRD-PARTY SOFTWARE. Portions of the code included in or with Viscosity may contain, or may be derived from, third-party code, including without limitation, open source software. All use of third-party software is subject to and governed by the respective licenses for the third-party software. These licenses are available at the third-party licenses page (https://www.sparklabs.com/company/legal/thirdparty/) for the most recent version of the software, as well available as part of the software installation. Where a third-party license requires that the source code of the covered software or components be made available such as, but not limited to, the GNU General Public License, you may request a copy of the source code by following the offer instructions included with the third-party license. - -8. LIMITED WARRANTY AND LIMITATION OF REMEDIES. The software and documentation for Viscosity are offered AS IS and WITHOUT ANY WARRANTY express or implied, including but not limited to warranty of design, merchantability, or fitness for a particular purpose. In no event will SparkLabs be liable for any damages, including lost profits, lost savings, data theft, or other incidental or consequential damages, even if SparkLabs is advised of the possibility of such damages, or for any claim by you or any third party. - -9. ACKNOWLEDGMENT. You acknowledge that you have read this agreement, understand it, and agree to be bound by its terms and conditions. - -SparkLabs® and Viscosity® are registered trademarks of SparkLabs Pty Ltd. Any unauthorized use is expressly prohibited. - -Privacy Policy - -We at SparkLabs are committed to maintaining the trust and confidence of the users of our software and of visitors to our website. We strongly support the right to privacy, and where possible we prefer to avoid collecting data from you. - -Our Privacy Policy is designed to give a transparent overview of the cases where we do collect data, why we collect it, how it is used, and how long it is kept for. By using our website or products, you consent to our Privacy Policy. - -ONLINE STORE - -If you make a purchase of a product from our website your full name, company name (optional), address, and email address are collected. If the gift option is enabled, an additional full name, company name (optional) and email address are also collected to be used instead for billing information. - -This information is collected for a number of reasons: a) We are legally required to collect some or all of this information to assess taxation obligations (such as the collection of GST) and to meet record-keeping requirements; b) To verify you are the license owner in the event you contact us for support; c) To verify you are the license owner if you request we re-send your license information; d) To verify you are the license owner when upgrading an existing license; and e) To help prevent fraudulent purchases. - -We do not sell or provide any of your collected details to third-parties beyond what is required to process your payment. To process a payment, some of your billing details may be provided to the payment processor. Please see the Third-Party Data Processors section for further information. - -We do not collect or retain your credit card number, expiration date, or security code. These details are sent directly to the payment processor and do not pass through our servers or network. - -We retain data related to the purchase of a product as long as is legally required to meet record-keeping obligations under taxation law and related legislation. Beyond that, we may continue to retain purchase-related data while the product is still considered active by us. An active product is one that is available for purchase, or technical support is still available, or upgrades from the product are still offered. - -SUPPORT AND CORRESPONDENCE - -We provide the ability to get in contact with us for product support, feedback, and for other general correspondence. When contacting us we will collect the email address (for emails or forum posts), username (if contacting us via our website forum or Twitter), and any information you provide. This information is used to resolve your support inquiry. - -We may elect to delete support emails shortly after they are resolved if we believe they contain highly sensitive data, for example if an email contains the credentials needed to access a user's VPN server. Other support emails and correspondence are retained indefinitely, however you may request we remove support emails you have sent. - -PRODUCTS - -Our software products may need to contact our servers as part of their operation, such as when checking for an updated version, submitting a crash log, or checking the authenticity of a license. - -Update Checking - -By default, our products will periodically check to see if an updated version is available. To perform these checks our products will send, via an encrypted connection, the product name, currently installed version, the operating system name, and operating system version. - -Our products do not send any "system profiling" data (detailed information about your computer's system) as part of these checks. If desired, update checking can be disabled by using the appropriate setting in the product's preferences window. - -Further technical details regarding Update Checking can be found in our support section. - -We do not retain any data sent as part of an update check. However, data about the network request itself may be logged by the server (please see Server Logging below). - -Crash Logs - -If one of our products crash while using it, you'll be given the option to send along data about the crash to us. This is used to help us identify the cause of the crash, and where possible, fix it in a future update. Crash data is anonymised, you can view it prior to sending, and the report is sent via an encrypted connection. - -The submission of a crash report is optional, and you can choose not to send a crash report. You can optionally provide additional comments about the crash, as well as provide an email address. If providing an email address, it will only ever be used to contact you if we need to seek more information about the cause of the crash. - -If we are actively working to fix the cause of a crash, we may retain the crash report until the problem has been resolved. Otherwise crash reports are retained for up to 60 days. Data about the network request when sending the report may be logged by the server (please see Server Logging below). - -License Checks - -Our products may contact our servers to check that the license details used to register the product are valid. This may occur when entering your license details into the product, and occasionally repeated. - -A license check sends a secure irreversible hash of your license details over an encrypted connection, and the server will return either a valid or invalid status. Your actual license details are not sent. Like with update checks, our products do not collect or send any "system profiling" data as part of a license check. - -In addition, Viscosity will endeavour to perform these checks in a fashion that prevents monitoring for these checks by a malicious administrator or country level actor in an attempt to identify someone as a Viscosity user. Further technical details regarding License Checks can be found in our support section. - -We retain a log of license checks for 60 days. Data about the network request when sending the report may be logged by the server (please see Server Logging below). - -Core Operation - -Our products may need to send some private data as part of their core operation. For example, Viscosity may need to send your username and password to the VPN server you are connecting to as part of the authentication process, and subsequently send network traffic to the VPN server. Such data will be sent to the configured party, and not to us or any other third-party. It is your responsibility to be aware of the security and privacy implications for the VPN configurations you use (such as encryption options and network routes) and the VPN servers you connect to. - -Data Not Collected - -With the exception of the operations listed in the sections above, our products do not collect or send out any private data. For example, our products will not send any interactions (such as keyboard and mouse input), network traffic, usernames, passwords, settings, encryption keys, microphone or camera recordings, VPN connection names, VPN connection configuration data, or VPN traffic. - -EMAIL LISTS - -We maintain a low-volume email newsletter that you may subscribe to through our website. This newsletter is completely optional, and you will never be automatically subscribed. - -If you subscribe to our newsletter, we will retain your email address until you ask to be unsubscribed. Instructions on how to unsubscribe are included in all newsletter emails. - -SERVER LOGGING - -When you visit our website or interact with one of our servers via a network request, some basic data about the request is logged. This may include data such as your IP address, date and time, the URL or path of the resource or file accessed, browser/software information contained in HTTP/HTTPS request headers (such as your browser/software name and version and operating system information), and request status. - -We may also log additional user-entered data when using features of our website that allow for access to customer data, such as the Lost License and View Invoice features. This extra data is used to help protect against malicious users attempting to gain access, for example by using brute-force attacks. - -Server logs are typically only examined when diagnosing a server problem and to monitor for malicious access or use. Server logs may be kept for up to 90 days. - -THIRD-PARTY DATA PROCESSORS - -We use a small number of third-party service providers that may process your personal data. Who these are, and what they are used for, can be found below: - -* Credit card and Apple Pay processing for purchases and payments made using our website is provided by Braintree. Braintree are provided with your payment and billing details when making a payment. Privacy Policy. -* PayPal is used to accept PayPal and Discover card payments for purchases and payments made using our website. PayPal are provided with your billing details when making a payment. Privacy Policy. -* Coinbase Commerce is used to accept Bitcoin payments. Privacy Policy. -* Fraud prevention is performed by MaxMind. They are provided with your IP address and email address. Privacy Policy. -* Mailroute is used for spam email filtering. Emails you send to us will pass through their email filtering servers. Privacy Policy. -* CDN and DDoS protection services are provided by Cloudflare. When using our website your requests may be proxied through their servers. Privacy Policy. - -BACKUPS - -We keep archived backups of company data so that a data loss event, such as a significant hardware failure, doesn't have an adverse business impact. Archived backups have strictly limited access, they are not readily accessible, and they are not used in "production" environments. Backups of data will only be used in the event of data loss, and they are not otherwise accessed. - -Archived backups may be kept for up to 12 months. Archived data may persist in backups for up to this length of time even if it has otherwise been removed from our production data. - -RIGHTS UNDER GDPR - -European Union citizens may exercise their rights under the GDPR (General Data Protection Regulation), such as access to their personal information, by contacting us via email with their request. - -QUESTIONS - -This privacy policy may be updated or edited in the future. The most up-to-date information will always be available from our legal information page on our website. - -If you have any questions about our privacy policy or collection of data, please don't hesitate to get in touch via email with our support staff. - -GDPR Erasure Requests - -As outlined in our Privacy Policy, we may collect certain personal data from you when using our website or products, such as when purchasing one of our products, making a support request, or subscribing to our newsletter. Under Article 17 of the GDPR individual citizens of the European Union (EU) have the right to have certain personal data erased, commonly known as the "right to be forgotten". - -This document outlines what personal data we can erase, how to submit an erasure request if you're an EU citizen and you'd like certain personal data permanently removed, and the timeframe you can expect for it to be processed. - -Special Note: We are not your VPN Provider. We do not run or operate the VPN server/s you connect to using Viscosity. We have no knowledge, data, or logs regarding your VPN connection/s or the use of your VPN connection/s. - -DATA WE CAN'T ERASE - -Before submitting an erasure request it's important to realise that not all personal data can be erased. The GDPR outlines a number of circumstances where the right to erasure does not apply. - -In particular, we can only erase personal data if we no longer have a legal obligation to retain it, and that the data is no longer necessary for the purpose it was collected or processed. We have outlined where this applies and why below: - -* We cannot erase any financial, billing, purchase, or other associated data for store purchases, refunds, or other transactions that occurred during the current Australian financial year or the previous five (5) financial years. This is a legal obligation to comply with the record keeping provisions of the Australian Tax Office (ATO). -* We cannot erase purchase or billing data that is included on invoices for purchases less than ten (10) years old. All online store purchases have an invoice associated with them. Personal data on an invoice includes your name, address, and tax identifier (if applicable). This is a legal obligation to comply with the record keeping requirements of the EU's VAT Mini One Stop Shop (MOSS) scheme. -* We cannot erase personal data which wasn't collected or stored by us (for example, if you have created an account with PayPal, we cannot erase the data they directly collected from you). -* We cannot erase data which isn't considered personal data. This includes non-identifying data we have generated, such as product serial numbers. - -Please note that this is not an exhaustive list. When processing a request we may find there is a legal or other obligation that prevents erasure of certain data. You will be notified if this is the case. - -DATA WE CAN ERASE - -The following is a list of personal data we collect: - -* Newsletter subscriptions (includes email address and IP address used for signup) -* Forum accounts (includes username, email address, posts, and IP addresses) -* Crash logs (if submitted with a contact email address) -* Email and written correspondence (subject to the conditions in the section above) -* Support requests (subject to the conditions in the section above) -* License and receipt emails (subject to the conditions in the section above) -* A purchase or quote billing name, address, email address, and IP address (subject to the conditions in the section above) -* Personal data from website logs - -You can request the erasure of any or all of the above information. We do not retain any other personal information. - -SUBMITTING AN ERASURE REQUEST - -To submit an Erasure Request under Article 17 of the GDPR please send an email titled "GDPR Erasure Request" to our support email address. This request must contain the following information to allow us to locate your personal data, confirm your identity, and identify the scope of the request: - -* Identify the information you wish erased, for example "all personal data associated with me", "only data related to my store purchase", "only support requests I have submitted", etc. -* Include the email address/es associated with your data (e.g. the billing email address used when making a purchase, the email address used when signing up to our newsletter, the email address used when creating a forum account, etc.). We may send an email to these addresses to verify you as the owner. If these email addresses are no longer active we may request additional verification. -* Include the transaction/receipt number and/or serial number of any purchases. - -Please note that the erasure of personal data relating to a product license (such as the license name or email address) will result in that license ceasing to function. Any software products using that license will be unregistered. This cannot be reverted. - -TIMEFRAME - -We will reply to all erasure requests within 30 days. If further information or correspondence is required to confirm your identity the erasure request will be processed within 30-days of confirmation. Otherwise all requests will be processed within 30 days of the initial request. - -When an erasure request is processed, all personal data it applies to is immediately and permanently deleted from our production systems. However please note that some of this data may temporarily persist in archived backup data for an additional period of time. For more information please refer to Backups section of our Privacy Policy. - -Refund Policy - -In certain instances you may be eligible for a refund if desired. We hope our products exceed your expectations, but in the rare instance they don't please read this document to ensure you understand our Refund Policy. - -As SparkLabs is based in NSW, Australia, we comply with the refund regulations of the NSW Fair Trading Act. Refund requests must be received in writing. Refunds will be given at the discretion of SparkLabs management. - -Our products can be downloaded and used at no cost for a trial-period of 30-days. The products are fully functional during this time and not restricted in any way. This provides the ability to completely test out the product to ensure it is fit for purpose and understand the job the product is designed to do. Hence: - -* Refunds will not be given if a customer changes their mind. -* Refunds will not be given if the customer believed the product performed a different job than it actually does. -* Refunds will not be given if we have incurred significant support costs. - -We do understand that mistakes can be made, and generally we will try to refund a purchase regardless of the reason. However we do ask that customers make use of the 30-day trial period, and we reserve the right to adhere to the refund rules specified above. - -If a refund is given, the license serial/key for the purchase will be disabled. diff --git a/docs/mirror.rst b/docs/mirror.rst deleted file mode 100644 index 49e752daad..0000000000 --- a/docs/mirror.rst +++ /dev/null @@ -1,234 +0,0 @@ -.. sectnum:: - :suffix: . - -Data mirroring -############## - -This is a draft specification of the data mirroring facility in Azul. The -facility is currently under construction in an effort to make all public data in -the Human Cell Atlas [1]_ available in AWS S3, under the auspices of the Open -Data Sponsorship Program [2]_. This specification may not be fully implemented -at this time and is subject to change as the implementation progresses. - -.. [1] https://www.humancellatlas.org/ -.. [2] https://aws.amazon.com/opendata/open-data-sponsorship-program/ - - -Mirror bucket layout -==================== - -A mirror bucket is an AWS S3 bucket. The bucket layout employs content-based -addressing in order to allow for efficient mirroring and to avoid redundantly -storing duplicate files. The bucket contains three types of objects: file -objects, alias objects and info objects. - - -File objects -++++++++++++ - -A file object holds a file's content, a sequence of bytes. There is one file -object per unique sequence of bytes. If two files have the same content, there -is only one file object in the mirror, representing both. The key of a file -object is ``file/${digest_value}.${digest_type}`` where ``digest_value`` is the -hexadecimal form of a hash of the file object's content and ``digest_type`` is -one of ``sha1``, ``md5`` or ``sha256``, denoting the type of algorithm used to -derive that hash. Henceforth we'll be referring to the pair of ``digest_type`` -and ``digest_value`` as *digest*. - - -Alias objects -+++++++++++++ - -Alias objects are used to make a file object accessible under hash algorithms -other than the algorithm specified in the file object's key. The key of an alias -object is ``alias/${digest_value}.${digest_type}.json`` where ``digest_value`` -is the hexadecimal form of a hash of a file object's content and ``digest_type`` -is one of ``sha1``, ``md5`` or ``sha256``, denoting the type of algorithm used -to derive that hash. The content of an alias object is JSON of the form -``{"$schema":"…", "digest_value":…, "digest_type":…}`` where ``digest_value`` -and ``digest_type`` represent the digest to be used when composing the aliased -file object's key. - -The ``$schema`` property facilitates future changes to the format of aliase -objects. For details see the `Schemas`_ section below. - - -Info objects -++++++++++++ - -Info objects contain JSON further describing a file. The key of an info object -is ``info/${digest_value}.${digest_type}.json`` where ``digest_value`` is the -hexadecimal form of a hash of the corresponding file object's content and -``digest_type`` is one of ``sha1``, ``md5`` or ``sha256``, denoting the type of -algorithm used to derive that hash. The content of an ``info`` object is JSON of -the form ``{"$schema":"…", "content-type":…}``. - -The ``content-type`` property contains the content type of the file, as defined -for the HTTP response header of the same name [4]_. - -.. [4] https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Content-Type - -The ``$schema`` property facilitates future changes to the format of info -objects. For details see the `Schemas`_ section below. - - -Schemas -======= - -The ``$schema`` property of alias and info objects contains, and always will -contain, the ``http://`` or ``https://`` URL of a JSON schema [3]_ that the -alias and info objects' JSON content conforms to. The last path component of the -schema URL is, and will always be, of the form ``v${schema_version}.json`` where -``schema_version`` is a monotonically increasing integer. - -The contents of a schema at a given URL may change without a change to the URL, -but only in backwards compatible ways, i.e. by adding a new property. Backwards -incompatible schema changes will lead to an increment in the version. -Programmatic consumers of alias and info objects should check the version number -encoded in the schema URL stored in the object prior to consuming the rest of -the object and should not attempt to consume the remainder of an object with an -unexpected schema version. - -Other parts of a schema URL may change without notice. Consumers of alias and -info objects should not make any assumptions about those parts. Consumers may -only assume that a request to the URL yields a valid JSON schema, that the last -path component encodes the schema version and that different schema versions are -incompatible to each other. - -.. [3] https://json-schema.org/ - - -Constraints and invariants -========================== - -The digest stored in an alias object is always different to the digest encoded -in its key. In other words, there are no redundant aliases. - -If there is an alias object, the aliased file object is guaranteed to exist. - -If there is an info object for a given digest, then there is also a file object -for that same digest. However, if there is a file object for a given digest, -there *typically* is an info object for that digest. In the uncommon and -temporary situation that there isn't, the client should retry checking both the -file object and the info object at a later time, at which point both will either -exist or not exist. Alternatively, clients can avoid this situation by always -checking for the info object first. - - -File retrieval procedure -======================== - -A file can be retrieved from the mirror using the S3 REST API, given a certain -digest, i.e., content hash of the file. There is only a limited set of digest -types through which a file is accessible in the mirror: at most it will be -``sha256``, ``sha1`` and ``md5``, but at least one of those. One of these digest -types, the *primary* one, is used in the key of the file object, and there may -or may not be alias objects for the other two. - -Digests of a file can be looked up in the Azul REST API, using the file's name -or a combination of other metadata properties associated with the file. The Azul -response indicates a file's primary type of digest. If the mirror doesn't -contain a file object for the primary digest returned by Azul, it won't contain -aliases for other digests returned by Azul either, but if Azul returns a primary -digest for a file, the mirror will eventually include aliases for every other -digest returned by Azul for that file. - -There are two retrieval procedures, depending on whether the content type of the -file is desired or not, and if the digest is guaranteed to be correct. - - -Retrieval of just the file content -++++++++++++++++++++++++++++++++++ - -This method is slightly simpler than the one described in the next section but -it should only be used if the file's content type is not needed, and if it is -acceptable that, in rare circumstances, the file's actual content doesn't match -the digest used in the file object's key or in the key of one of its aliases. - -Step 1: Try the file object ---------------------------- - -Using the digest, compose the key of the file object. Attempt to retrieve the -file object. If the digest originated from Azul and Azul denoted it as primary, -the file object will exist. If the file object does not exist, continue with -step 2. This can happen if the digest originated from another source or if it is -unknown whether the digest is the primary one. - -Step 2: Try an alias --------------------- - -Using the digest, compose the key of an alias object. This is the key used in -step 1 but with ``alias/`` at the beginning, instead of ``file/``. Attempt to -retrieve the alias object. If the alias object exists, proceed to step 3. If the -alias object doesn't exist, the mirror doesn't include the file, at least not -under the given type of digest. - -Step 3: Retrieve the file object --------------------------------- - -Using the digest extracted from the alias object's JSON content, compose the key -of the file object. Retrieve the file object (it will exist). - - -Retrieval of file content and content type -++++++++++++++++++++++++++++++++++++++++++ - -This method is slightly more involved than the one described in the previous -section but it yields a file's content type in addition to the content itself, -and it guarantees that the digests used in the file and alias objects' keys -match that content. It is the recommended retrieval procedure. - -Step 1: Try the info object ---------------------------- - -Using the digest, compose the key of the info object. Attempt to retrieve the -info object. If the info object exists, extract the ``content-type`` property -from the info object's JSON content and proceed to step 4. If the info object -does not exist, continue with step 2. - -Step 2: Try an alias --------------------- - -Using the digest, compose the key of an alias object. This is the key used in -step 1 but with ``alias/`` at the beginning, instead of ``info/``. Retrieve the -alias object. If the alias object exists, proceed to step 3. If the alias object -doesn't exist, the mirror doesn't include the file, at least not under the given -type of digest. - -Step 3: Retrieve the info object --------------------------------- - -Compose the key of the info object using the digest extracted from the alias -object's JSON content. Retrieve the info object (it will exist), extract the -``content-type`` property from its JSON content and proceed to step 4. - -Step 4: Retrieve the file object --------------------------------- - -Using the current digest, i.e. the one used in step 1 or step 3, compose the key -of the file object. Retrieve the file object (it will exist). - - -Rationale -========= - -How does the specified layout represent the orginal names of the files stored in -the bucket? It doesn't. Because the mirror bucket is content-addressed, the same -file object could be associated with multiple names. File names are metadata -that can be easily retrieved from Azul, a REST webservice for querying an index -of rich metadata describing file objects, including their name, format and -provenance. Azul also provides a convenient way to retrieve the signed URL of -both the original copy of a file in an upstream data repository, as well as that -of the copy in a mirror bucket. The signed URLs minted by Azul encode the name -of the file, so that common user agents such as a web browser, or utilities like -``curl`` or ``wget`` will save a downloaded file under its original name. - -Another question might be why the layout doesn't associate the content type -directly with the S3 object. After all, S3 has a mechanism for associating -arbitrary response headers directly with an object. The reason we don't make use -of that feature is that we want the mirror bucket layout to be highly portable. -This makes it possible to replicate the mirror bucket to virtually any file -system or cloud storage service. While this design decision complicates access -to files in the mirror bucket, we've believe we addressed those complications by -offering the Azul endpoint for minting signed URLs mentioned above. The signed -URLs encode both the content type and the name of a file. diff --git a/lambdas/Makefile b/lambdas/Makefile deleted file mode 100644 index cf78d594d6..0000000000 --- a/lambdas/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -.PHONY: all -all: layer indexer service - -include ../common.mk - -.PHONY: layer -layer: check_env - $(MAKE) -C layer layer - -.PHONY: indexer -indexer: check_env - $(MAKE) -C indexer package - -.PHONY: service -service: check_env - $(MAKE) -C service package - -.PHONY: clean -clean: check_env - for d in indexer service layer; do $(MAKE) -C $$d clean || ! break; done - -.PHONY: openapi -openapi: check_env - for d in indexer service; do $(MAKE) -C $$d openapi || ! break; done diff --git a/lambdas/indexer/.chalice/config.json.template.py b/lambdas/indexer/.chalice/config.json.template.py deleted file mode 100644 index 0217e66d7c..0000000000 --- a/lambdas/indexer/.chalice/config.json.template.py +++ /dev/null @@ -1,88 +0,0 @@ -from azul import ( - config, -) -from azul.modules import ( - load_app_module, -) -from azul.template import ( - emit, -) -from azul.terraform import ( - chalice, -) - -suffix = '-' + config.deployment_stage -assert config.indexer_name.endswith(suffix) - -app_name = 'indexer' - -indexer = load_app_module(app_name) - -emit({ - 'version': '2.0', - 'app_name': config.indexer_name[:-len(suffix)], # Chalice appends stage name implicitly - 'api_gateway_stage': config.deployment_stage, - 'manage_iam_role': False, - 'iam_role_arn': '${aws_iam_role.%s.arn}' % app_name, - 'environment_variables': config.lambda_env, - 'lambda_timeout': config.api_gateway_lambda_timeout, - 'lambda_memory_size': 128, - 'stages': { - config.deployment_stage: { - **chalice.private_api_stage_config(app_name), - 'lambda_functions': { - 'api_handler': chalice.vpc_lambda_config(app_name), - indexer.contribute.name: { - 'reserved_concurrency': config.contribution_concurrency(retry=False), - 'lambda_memory_size': 256, - 'lambda_timeout': config.contribution_lambda_timeout(retry=False), - **chalice.vpc_lambda_config(app_name) - }, - indexer.contribute_retry.name: { - 'reserved_concurrency': config.contribution_concurrency(retry=True), - 'lambda_memory_size': 4096, # FIXME https://github.com/DataBiosphere/azul/issues/2902 - 'lambda_timeout': config.contribution_lambda_timeout(retry=True), - **chalice.vpc_lambda_config(app_name) - }, - indexer.aggregate.name: { - 'reserved_concurrency': config.aggregation_concurrency(retry=False), - 'lambda_memory_size': 256, - 'lambda_timeout': config.aggregation_lambda_timeout(retry=False), - **chalice.vpc_lambda_config(app_name) - }, - indexer.aggregate_retry.name: { - 'reserved_concurrency': config.aggregation_concurrency(retry=True), - 'lambda_memory_size': 6500, - 'lambda_timeout': config.aggregation_lambda_timeout(retry=True), - **chalice.vpc_lambda_config(app_name) - }, - **( - { - indexer.forward_alb_logs.name: chalice.vpc_lambda_config(app_name), - indexer.forward_s3_logs.name: chalice.vpc_lambda_config(app_name), - } - if config.enable_log_forwarding else - {} - ), - **( - { - indexer.mirror.name: { - 'reserved_concurrency': config.mirroring_concurrency, - 'lambda_memory_size': 512, - 'lambda_timeout': config.mirror_lambda_timeout - # No VPC for this function so as to avoid paying for - # NAT Gateway traffic - }, - } - if config.enable_mirroring else - {} - ), - indexer.update_health_cache.name: { - 'lambda_memory_size': 128, - 'lambda_timeout': config.health_cache_lambda_timeout, - **chalice.vpc_lambda_config(app_name) - } - } - } - } -}) diff --git a/lambdas/indexer/.chalice/deployed/.gitkeep b/lambdas/indexer/.chalice/deployed/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/lambdas/indexer/.gitignore b/lambdas/indexer/.gitignore deleted file mode 100644 index e19601411e..0000000000 --- a/lambdas/indexer/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/.chalice/* -!/.chalice/*.template.py -/lambda-policy.json -/vendor/* -!/vendor/azul -!/vendor/humancellatlas -!/vendor/resources/static -!/vendor/resources/*.template.py diff --git a/lambdas/indexer/Makefile b/lambdas/indexer/Makefile deleted file mode 100644 index bd674c8176..0000000000 --- a/lambdas/indexer/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -.PHONY: all -all: package - -include ../lambdas.mk diff --git a/lambdas/indexer/app.py b/lambdas/indexer/app.py deleted file mode 100644 index 10ff2ec656..0000000000 --- a/lambdas/indexer/app.py +++ /dev/null @@ -1,129 +0,0 @@ -import logging -from typing import ( - Optional, -) - -import chalice - -from azul import ( - JSON, - cached_property, - config, -) -from azul.chalice import ( - LambdaMetric, -) -from azul.deployment import ( - aws, -) -from azul.health import ( - HealthApp, -) -from azul.hmac import ( - HMACAuthentication, - SignatureHelper, -) -from azul.indexer.index_controller import ( - IndexController, -) -from azul.indexer.log_forwarding_controller import ( - LogForwardingController, -) -from azul.indexer.mirror_controller import ( - MirrorController, -) -from azul.logging import ( - configure_app_logging, -) -from azul.openapi import ( - format_description as fd, -) -from azul.types import ( - not_none, -) - -log = logging.getLogger(__name__) - -spec: JSON = { - 'openapi': '3.0.1', - 'info': { - 'title': config.indexer_name, - # The version property should be updated in any PR connected to an issue - # labeled `API`. Increment the major version for backwards incompatible - # changes and reset the minor version to zero. Otherwise, increment only - # the minor version for backwards compatible changes. A backwards - # compatible change is one that does not require updates to clients. - 'version': '3.3', - 'description': fd(''' - This is the internal API for Azul's indexer component. - ''') - } -} - - -class IndexerApp(HealthApp, SignatureHelper): - - @cached_property - def index_controller(self) -> IndexController: - return IndexController(app=self) - - @cached_property - def mirror_controller(self) -> MirrorController: - return MirrorController(app=self) - - @cached_property - def log_controller(self) -> LogForwardingController: - return LogForwardingController(app=self) - - def __init__(self): - super().__init__(app_name=config.indexer_name, - globals=globals(), - spec=spec) - - def log_forwarder(self, prefix: str): - if config.enable_log_forwarding: - s3_decorator = self.on_s3_event(bucket=aws.logs_bucket, - events=['s3:ObjectCreated:*'], - prefix=prefix) - error_decorator = self.metric_alarm(metric=LambdaMetric.errors, - threshold=1, # One alarm … - period=24 * 60 * 60) # … per day. - throttle_decorator = self.metric_alarm(metric=LambdaMetric.throttles, - threshold=0, - period=5 * 60) - retry_decorator = self.retry(num_retries=2) - - def decorator(f): - return retry_decorator(throttle_decorator(error_decorator(s3_decorator(f)))) - - return decorator - else: - return lambda func: func - - def _authenticate(self) -> Optional[HMACAuthentication]: - return self.auth_from_request(not_none(self.current_request)) - - -app = IndexerApp() -configure_app_logging(app, log) - -globals().update(app.default_routes()) - -globals().update(app.index_controller.handlers()) - - -@app.log_forwarder( - config.alb_access_log_path_prefix(deployment=None) -) -def forward_alb_logs(event: chalice.app.S3Event): - app.log_controller.forward_alb_logs(event) - - -@app.log_forwarder( - config.s3_access_log_path_prefix(deployment=None) -) -def forward_s3_logs(event: chalice.app.S3Event): - app.log_controller.forward_s3_access_logs(event) - - -globals().update(app.mirror_controller.handlers()) diff --git a/lambdas/indexer/openapi.json b/lambdas/indexer/openapi.json deleted file mode 100644 index 73c8898443..0000000000 --- a/lambdas/indexer/openapi.json +++ /dev/null @@ -1,908 +0,0 @@ -{ - "openapi": "3.0.1", - "info": { - "title": "azul-indexer-dev", - "version": "3.3", - "description": "\nThis is the internal API for Azul's indexer component.\n\n\n## Contact us\n\nFor technical support please file an issue at\n[GitHub](https://github.com/DataBiosphere/azul/issues) or email\n`azul-group@ucsc.edu`. To report a security concern or misconduct please email\n`azul-group@ucsc.edu`.\n" - }, - "paths": { - "/": { - "get": { - "summary": "Redirect to the Swagger UI for interactive use of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "301": { - "description": "A redirect to the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/index.html": { - "get": { - "summary": "The Swagger UI for interactive use of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is an HTML page containing the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/swagger-initializer.js": { - "get": { - "summary": "Used internally by the Swagger UI", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is JavaScript used internally by the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/{file}": { - "parameters": [ - { - "name": "file", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The name of a static file to be returned" - } - ], - "get": { - "summary": "Static files needed for the Swagger UI", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is the contents of the requested file" - }, - "404": { - "description": "The requested file does not exist" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/openapi.json": { - "get": { - "summary": "Return OpenAPI specifications for this REST API", - "description": "\nThis endpoint returns the [OpenAPI specifications]'\n(https://github.com/OAI/OpenAPI-Specification) for this REST\nAPI. These are the specifications used to generate the page\nyou are visiting now.\n", - "responses": { - "200": { - "description": "200 response", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "openapi": { - "type": "string" - }, - "info": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "tags": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "servers": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "paths": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "components": { - "type": "object", - "properties": {}, - "additionalProperties": false - } - }, - "required": [ - "openapi", - "info", - "tags", - "servers", - "paths", - "components" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/version": { - "get": { - "summary": "Describe current version of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "Version endpoint is reachable.", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "git": { - "type": "object", - "properties": { - "commit": { - "type": "string" - }, - "dirty": { - "type": "boolean" - } - }, - "required": [ - "commit", - "dirty" - ], - "additionalProperties": false - } - }, - "required": [ - "git" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/robots.txt": { - "get": { - "summary": "Robots Exclusion Protocol", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "\nThe robots.txt resource according to\n[RFC9309](https://datatracker.ietf.org/doc/html/rfc9309)\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/health": { - "get": { - "summary": "Complete health check", - "description": "\nHealth check of the indexer REST API and all\nresources it depends on. This may take long time to complete\nand exerts considerable load on the API. For that reason it\nshould not be requested frequently or by automated\nmonitoring facilities that would be better served by the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) or\n[`/health/cached`](#operations-Auxiliary-get_health_cached)\nendpoints.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/basic": { - "get": { - "summary": "Basic health check", - "description": "\nHealth check of only the REST API itself, excluding other\nresources that it depends on. A 200 response indicates that\nthe indexer is reachable via HTTP(S) but nothing\nmore.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/cached": { - "get": { - "summary": "Cached health check for continuous monitoring", - "description": "\nReturn a cached copy of the\n[`/health/fast`](#operations-Auxiliary-get_health_fast)\nresponse. This endpoint is optimized for continuously\nrunning, distributed health monitors such as Route 53 health\nchecks. The cache ensures that the indexer is not\noverloaded by these types of health monitors. The cache is\nupdated every minute.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "queues": {}, - "progress": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "queues": {}, - "progress": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/fast": { - "get": { - "summary": "Fast health check", - "description": "\nPerformance-optimized health check of the REST API and other\ncritical resources tht it depends on. This endpoint can be\nrequested more frequently than\n[`/health`](#operations-Auxiliary-get_health) but\nperiodically scheduled, automated requests should be made to\n[`/health/cached`](#operations-Auxiliary-get_health_cached).\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "queues": {}, - "progress": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "queues": {}, - "progress": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/{keys}": { - "parameters": [ - { - "name": "keys", - "in": "path", - "required": true, - "schema": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "api_endpoints", - "elasticsearch", - "other_lambdas", - "progress", - "queues" - ] - } - }, - "description": "\nA comma-separated list of keys selecting the health\nchecks to be performed. Each key corresponds to an\nentry in the response.\n" - } - ], - "get": { - "summary": "Selective health check", - "description": "\nThis endpoint allows clients to request a health check on a\nspecific set of resources. Each resource is identified by a\n*key*, the same key under which the resource appears in a\n[`/health`](#operations-Auxiliary-get_health) response.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/{catalog}/{action}": { - "post": { - "tags": [ - "Indexing" - ], - "summary": "Notify the indexer to perform an action on a bundle", - "description": "\nQueue a bundle for addition to or deletion from the index.\n\nThe request must be authenticated using HMAC via the ``signature``\nheader. Each Azul deployment has its own unique HMAC key. The HMAC\ncomponents are the request method, request path, and the SHA256\ndigest of the request body.\n\nA valid HMAC header proves that the client is in possession of the\nsecret HMAC key and that the request wasn't tampered with while\ntravelling between client and service, even though the latter is not\nstrictly necessary considering that TLS is used to encrypt the\nentire exchange. Internal clients can obtain the secret key from the\nenvironment they are running in, and that they share with the\nservice. External clients must have been given the secret key. The\nnow-defunct DSS was such an external client. The Azul indexer\nprovided the HMAC secret to DSS when it registered with DSS to be\nnotified about bundle additions/deletions. These days only internal\nclients use this endpoint.\n", - "requestBody": { - "description": "Contents of the notification", - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "bundle_fqid": { - "type": "object", - "properties": { - "uuid": { - "type": "string" - }, - "version": { - "type": "string" - }, - "source": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "spec": { - "type": "string" - } - }, - "required": [ - "id", - "spec" - ], - "additionalProperties": false - } - }, - "required": [ - "uuid", - "version", - "source" - ], - "additionalProperties": false - } - }, - "required": [ - "bundle_fqid" - ], - "additionalProperties": false - } - } - } - }, - "parameters": [ - { - "name": "catalog", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ] - }, - "description": "The name of the catalog to notify." - }, - { - "name": "action", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": [ - "add", - "delete" - ] - }, - "description": "Which action to perform." - }, - { - "name": "signature", - "in": "header", - "required": true, - "schema": { - "type": "string" - }, - "description": "HMAC authentication signature." - } - ], - "responses": { - "200": { - "description": "Notification was successfully queued for processing" - }, - "400": { - "description": "Request was rejected due to malformed parameters" - }, - "401": { - "description": "Request lacked a valid HMAC header" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/schemas/{facility}/{schema_name}/{version_and_extension}": { - "get": { - "summary": "Retrieve JSON schemas", - "tags": [ - "Auxiliary" - ], - "parameters": [ - { - "name": "facility", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "schema_name", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "version_and_extension", - "in": "path", - "required": true, - "schema": { - "type": "string", - "pattern": "v\\d+\\.json" - } - } - ], - "description": "\n[JSON Schemas](https://json-schema.org/docs) for various Azul facilities.\n", - "responses": { - "200": { - "description": "Contents of the schema", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "schema": { - "type": "string" - }, - "id": { - "type": "string" - }, - "type": { - "type": "string" - } - }, - "required": [ - "schema", - "id", - "type" - ], - "additionalProperties": true - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - } - }, - "tags": [], - "servers": [ - { - "url": "http://localhost/" - } - ] -} \ No newline at end of file diff --git a/lambdas/indexer/vendor/azul b/lambdas/indexer/vendor/azul deleted file mode 120000 index b3310b2b5d..0000000000 --- a/lambdas/indexer/vendor/azul +++ /dev/null @@ -1 +0,0 @@ -../../../src/azul \ No newline at end of file diff --git a/lambdas/indexer/vendor/humancellatlas b/lambdas/indexer/vendor/humancellatlas deleted file mode 120000 index bf4bcf48ff..0000000000 --- a/lambdas/indexer/vendor/humancellatlas +++ /dev/null @@ -1 +0,0 @@ -../../../src/humancellatlas \ No newline at end of file diff --git a/lambdas/indexer/vendor/resources/environ.json.template.py b/lambdas/indexer/vendor/resources/environ.json.template.py deleted file mode 100644 index f77298a3aa..0000000000 --- a/lambdas/indexer/vendor/resources/environ.json.template.py +++ /dev/null @@ -1,8 +0,0 @@ -from azul import ( - config, -) -from azul.template import ( - emit, -) - -emit(config.lambda_env_for_outsourcing) diff --git a/lambdas/indexer/vendor/resources/static/schemas b/lambdas/indexer/vendor/resources/static/schemas deleted file mode 120000 index fd8289d0d9..0000000000 --- a/lambdas/indexer/vendor/resources/static/schemas +++ /dev/null @@ -1 +0,0 @@ -../../../../../schemas \ No newline at end of file diff --git a/lambdas/indexer/vendor/resources/static/swagger b/lambdas/indexer/vendor/resources/static/swagger deleted file mode 120000 index 7c782ec5ff..0000000000 --- a/lambdas/indexer/vendor/resources/static/swagger +++ /dev/null @@ -1 +0,0 @@ -../../../../../swagger/ \ No newline at end of file diff --git a/lambdas/lambdas.mk b/lambdas/lambdas.mk deleted file mode 100644 index 84f4816385..0000000000 --- a/lambdas/lambdas.mk +++ /dev/null @@ -1,60 +0,0 @@ -# Relative paths are based on the CWD, not the directory containing this file. -# project_root is not defined if the user forgot to source environment. This -# solution is based on comments in https://stackoverflow.com/questions/322936. -include $(abspath $(dir $(lastword $(MAKEFILE_LIST))))/../common.mk - -# The compile target is used during packaging of lambdas. The target ensures -# that a .pyc file is present for every .py file in the package. -# -# One reason to compile before deploying is to reduce lambda start-up time. But -# more importantly, it ensures that the same files are always included in the -# Chalice deployment package. Having a consistent, deterministic deployment -# package allows Terraform to use the hash of the deployment package to easily -# decide if anything new is being deployed, and skip updating the lambdas -# otherwise. -# -# By default, Python embeds the modify timestamp of the source file into the -# .pyc and uses this to determine when to re-compile, but since Gitlab clones -# the repository each time it deploys, fresh timestamps prevented the deployment -# package from being deterministic. With the `--invalidation-mode checked-hash` -# option, Python embeds the hash of the source file embedded in the .pyc instead -# of the timestamp, which is consistent regardless of when the files were -# downloaded. -# -# The `-f` option forces recompilation. This is necessary because timestamp -# style .pycs may have already been created when other deployment scripts are -# run, and we need to overwrite them. -# -# Set literals will compile in a non-deterministic order unless PYTHONHASHSEED -# is set. For a full explanation see http://benno.id.au/blog/2013/01/15/python-determinism -# -# `compileall` ignores symlinks to directories during traversal, so we must -# explicitly list them as arguments to ensure all files in vendor/ are -# deterministically compiled. -# -.PHONY: compile -compile: check_python - PYTHONHASHSEED=0 python -m compileall \ - -f -q --invalidation-mode checked-hash \ - vendor $(shell find -L $$(find vendor -maxdepth 1 -type l) -maxdepth 0 -type d) - -.PHONY: config -config: .chalice/config.json - -.PHONY: environ -environ: vendor/resources/environ.json - -.PHONY: local -local: check_python config - chalice local - -.PHONY: clean -clean: git_clean_recursive - -.PHONY: package -package: check_branch check_python check_aws config environ compile - chalice package --stage $(AZUL_DEPLOYMENT_STAGE) --pkg-format terraform .chalice/terraform - -.PHONY: openapi -openapi: check_python - python $(project_root)/scripts/generate_openapi_document.py diff --git a/lambdas/layer/.chalice/config.json.template.py b/lambdas/layer/.chalice/config.json.template.py deleted file mode 100644 index feb9d0db1e..0000000000 --- a/lambdas/layer/.chalice/config.json.template.py +++ /dev/null @@ -1,14 +0,0 @@ -from azul import ( - config, -) -from azul.template import ( - emit, -) - -emit({ - "version": "2.0", - "app_name": config.qualified_resource_name("dependencies"), - "api_gateway_stage": config.deployment_stage, - "manage_iam_role": False, - "lambda_memory_size": 128, -}) diff --git a/lambdas/layer/.gitignore b/lambdas/layer/.gitignore deleted file mode 100644 index 9591dc1ed3..0000000000 --- a/lambdas/layer/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.chalice/* -!/.chalice/config.json.template.py diff --git a/lambdas/layer/Makefile b/lambdas/layer/Makefile deleted file mode 100644 index ed5c9055ae..0000000000 --- a/lambdas/layer/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -.PHONY: all -all: layer - -include ../lambdas.mk - -.PHONY: layer -layer: check_branch check_python check_aws config - python $(project_root)/scripts/stage_layer.py diff --git a/lambdas/layer/app.py b/lambdas/layer/app.py deleted file mode 100644 index f9865c4d82..0000000000 --- a/lambdas/layer/app.py +++ /dev/null @@ -1,18 +0,0 @@ -from azul import ( - config, -) -from azul.chalice import ( - AzulChaliceApp, -) - -# This whole file only exists so that we can use Chalice to create the layer -# package and is removed from the final result. - -app = AzulChaliceApp(app_name=config.qualified_resource_name('dependencies'), - globals=globals(), - spec={}) - - -@app.route('/', spec={}) -def foo(): - pass diff --git a/lambdas/layer/requirements.trans.txt b/lambdas/layer/requirements.trans.txt deleted file mode 120000 index a45a5d5f05..0000000000 --- a/lambdas/layer/requirements.trans.txt +++ /dev/null @@ -1 +0,0 @@ -../../requirements.trans.txt \ No newline at end of file diff --git a/lambdas/layer/requirements.txt b/lambdas/layer/requirements.txt deleted file mode 120000 index fd1efae711..0000000000 --- a/lambdas/layer/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../../requirements.txt \ No newline at end of file diff --git a/lambdas/service/.chalice/config.json.template.py b/lambdas/service/.chalice/config.json.template.py deleted file mode 100644 index c40dff9442..0000000000 --- a/lambdas/service/.chalice/config.json.template.py +++ /dev/null @@ -1,46 +0,0 @@ -from azul import ( - config, -) -from azul.modules import ( - load_app_module, -) -from azul.template import ( - emit, -) -from azul.terraform import ( - chalice, -) - -suffix = '-' + config.deployment_stage -assert config.service_name.endswith(suffix) - -app_name = 'service' - -service = load_app_module(app_name) - -emit({ - "version": "2.0", - "app_name": config.service_name[:-len(suffix)], # Chalice appends stage name implicitly - "api_gateway_stage": config.deployment_stage, - "manage_iam_role": False, - "iam_role_arn": "${aws_iam_role.%s.arn}" % app_name, - "environment_variables": config.lambda_env, - "lambda_timeout": config.api_gateway_lambda_timeout, - "lambda_memory_size": 2048, - **chalice.vpc_lambda_config(app_name), - "stages": { - config.deployment_stage: { - **chalice.private_api_stage_config(app_name), - "lambda_functions": { - "api_handler": chalice.vpc_lambda_config(app_name), - service.generate_manifest.name: { - "lambda_timeout": config.service_lambda_timeout - }, - service.update_health_cache.name: { - "lambda_memory_size": 128, - "lambda_timeout": config.health_cache_lambda_timeout - } - } - } - } -}) diff --git a/lambdas/service/.chalice/deployed/.gitkeep b/lambdas/service/.chalice/deployed/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/lambdas/service/.gitignore b/lambdas/service/.gitignore deleted file mode 100644 index e19601411e..0000000000 --- a/lambdas/service/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/.chalice/* -!/.chalice/*.template.py -/lambda-policy.json -/vendor/* -!/vendor/azul -!/vendor/humancellatlas -!/vendor/resources/static -!/vendor/resources/*.template.py diff --git a/lambdas/service/Makefile b/lambdas/service/Makefile deleted file mode 100644 index bd674c8176..0000000000 --- a/lambdas/service/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -.PHONY: all -all: package - -include ../lambdas.mk diff --git a/lambdas/service/app.py b/lambdas/service/app.py deleted file mode 100644 index 3be85bd6c3..0000000000 --- a/lambdas/service/app.py +++ /dev/null @@ -1,1774 +0,0 @@ -import base64 -from collections.abc import ( - Mapping, - Sequence, -) -from functools import ( - partial, -) -import hashlib -from inspect import ( - signature, -) -import json -import logging.config -from typing import ( - Callable, -) -import urllib.parse - -import attr -from chalice import ( - BadRequestError as BRE, - ChaliceViewError, - Response, - UnauthorizedError, -) -import chevron -from furl import ( - furl, -) -from more_itertools import ( - one, -) - -from azul import ( - CatalogName, - R, - cache, - cached_property, - config, - drs, - iif, - mutable_furl, - require, -) -from azul.auth import ( - OAuth2, -) -from azul.collections import ( - OrderedSet, -) -from azul.csp import ( - CSP, -) -from azul.drs import ( - AccessMethod, -) -from azul.health import ( - HealthApp, - HealthController, -) -from azul.indexer.document import ( - EntityType, -) -from azul.indexer.field import ( - FieldType, - Nested, -) -from azul.logging import ( - configure_app_logging, -) -from azul.openapi import ( - application_json, - format_description as fd, - params, - responses, - schema, -) -from azul.plugins import ( - ManifestFormat, - MetadataPlugin, - RepositoryPlugin, -) -from azul.plugins.metadata.hca.indexer.transform import ( - value_and_unit, -) -from azul.service.app_controller import ( - validate_catalog, - validate_params, -) -from azul.service.catalog_controller import ( - CatalogController, -) -from azul.service.drs_controller import ( - DRSController, -) -from azul.service.elasticsearch_service import ( - Pagination, -) -from azul.service.manifest_controller import ( - ManifestController, -) -from azul.service.manifest_service import ( - CurlManifestGenerator, -) -from azul.service.repository_controller import ( - RepositoryController, -) -from azul.types import ( - AnyJSON, - JSON, - LambdaContext, - MutableJSON, - PrimitiveJSON, - reify, -) - -log = logging.getLogger(__name__) - -spec = { - 'openapi': '3.0.1', - 'info': { - 'title': config.service_name, - # The version property should be updated in any PR connected to an issue - # labeled `API`. Increment the major version for backwards incompatible - # changes and reset the minor version to zero. Otherwise, increment only - # the minor version for backwards compatible changes. A backwards - # compatible change is one that does not require updates to clients. - 'version': '13.0', - 'description': fd(f''' - # Overview - - Azul is a REST web service for querying metadata associated with - both experimental and analysis data from a data repository. In order - to deliver response times that make it suitable for interactive use - cases, the set of metadata properties that it exposes for sorting, - filtering, and aggregation is limited. Azul provides a uniform view - of the metadata over a range of diverse schemas, effectively - shielding clients from changes in the schemas as they occur over - time. It does so, however, at the expense of detail in the set of - metadata properties it exposes and in the accuracy with which it - aggregates them. - - Azul denormalizes and aggregates metadata into several different - indices for selected entity types. Metadata entities can be queried - using the [Index](#operations-tag-Index) endpoints. - - A set of indices forms a catalog. There is a default catalog called - `{config.default_catalog}` which will be used unless a - different catalog name is specified using the `catalog` query - parameter. Metadata from different catalogs is completely - independent: a response obtained by querying one catalog does not - necessarily correlate to a response obtained by querying another - one. Two catalogs can contain metadata from the same sources or - different sources. It is only guaranteed that the body of a - response by any given endpoint adheres to one schema, - independently of which catalog was specified in the request. - - Azul provides the ability to download data and metadata via the - [Manifests](#operations-tag-Manifests) endpoints. The - `{ManifestFormat.curl.value}` format manifests can be used to - download data files. Other formats provide various views of the - metadata. Manifests can be generated for a selection of files using - filters. These filters are interchangeable with the filters used by - the [Index](#operations-tag-Index) endpoints. - - Azul also provides a [summary](#operations-Index-get_index_summary) - view of indexed data. - - ## Data model - - Any index, when queried, returns a JSON array of hits. Each hit - represents a metadata entity. Nested in each hit is a summary of the - properties of entities associated with the hit. An entity is - associated either by a direct edge in the original metadata graph, - or indirectly as a series of edges. The nested properties are - grouped by the type of the associated entity. The properties of all - data files associated with a particular sample, for example, are - listed under `hits[*].files` in a `/index/samples` response. It is - important to note that while each _hit_ represents a discrete - entity, the properties nested within that hit are the result of an - aggregation over potentially many associated entities. - - To illustrate this, consider a data file that is part of two - projects (a project is a group of related experiments, typically by - one laboratory, institution or consortium). Querying the `files` - index for this file yields a hit looking something like: - - ``` - {{ - "projects": [ - {{ - "projectTitle": "Project One" - "laboratory": ..., - ... - }}, - {{ - "projectTitle": "Project Two" - "laboratory": ..., - ... - }} - ], - "files": [ - {{ - "format": "pdf", - "name": "Team description.pdf", - ... - }} - ] - }} - ``` - - This example hit contains two kinds of nested entities (a hit in an - actual Azul response will contain more): There are the two projects - entities, and the file itself. These nested entities contain - selected metadata properties extracted in a consistent way. This - makes filtering and sorting simple. - - Also notice that there is only one file. When querying a particular - index, the corresponding entity will always be a singleton like - this. - ''') - }, - 'tags': [ - { - 'name': 'Index', - 'description': fd(''' - Query the indices for entities of interest - ''') - }, - { - 'name': 'Manifests', - 'description': fd(''' - Complete listing of files matching a given filter in TSV and - other formats - ''') - }, - { - 'name': 'Repository', - 'description': fd(''' - Access to data files in the underlying repository - ''') - }, - { - 'name': 'DSS', - 'description': fd(''' - Access to files maintained in the Data Store - ''') - }, - { - 'name': 'DRS', - 'description': fd(''' - DRS-compliant proxy of the underlying repository - ''') - }, - { - 'name': 'Auxiliary', - 'description': fd(''' - Describes various aspects of the Azul service - ''') - }, - { - 'name': 'Deprecated', - 'description': fd(''' - Endpoints that should not be used and that will be removed - ''') - } - ] -} - - -class ServiceApp(HealthApp): - - def spec(self) -> JSON: - return { - **super().spec(), - **self._oauth2_spec() - } - - def _oauth2_spec(self) -> JSON: - scopes = ('email',) - return { - 'components': { - 'securitySchemes': { - self.app_name: { - 'type': 'oauth2', - 'flows': { - 'implicit': { - 'authorizationUrl': 'https://accounts.google.com/o/oauth2/auth', - 'scopes': {scope: scope for scope in scopes} - } - } - } - } - }, - 'security': [ - {}, - {self.app_name: scopes} - ] - } - - @property - def drs_controller(self) -> DRSController: - return DRSController(app=self, file_url_func=self.file_url) - - @cached_property - def health_controller(self) -> HealthController: - return HealthController(app=self, lambda_name=self.unqualified_app_name) - - @cached_property - def catalog_controller(self) -> CatalogController: - return CatalogController(app=self, file_url_func=self.file_url) - - @cached_property - def repository_controller(self) -> RepositoryController: - return RepositoryController(app=self, file_url_func=self.file_url) - - @cached_property - def manifest_controller(self) -> ManifestController: - return ManifestController(app=self, - file_url_func=self.file_url, - manifest_url_func=self.manifest_url) - - @property - def metadata_plugin(self) -> MetadataPlugin: - return self._metadata_plugin(self.catalog) - - @cache - def _metadata_plugin(self, catalog: CatalogName): - return MetadataPlugin.load(catalog).create() - - @property - def repository_plugin(self) -> RepositoryPlugin: - return self._repository_plugin(self.catalog) - - @cache - def _repository_plugin(self, catalog: CatalogName): - return RepositoryPlugin.load(catalog).create(catalog) - - @property - def fields(self) -> Sequence[str]: - organic, synthetic = self.organic_fields, self.synthetic_fields - all = OrderedSet(organic) - all.update(synthetic) - assert len(all) == len(organic) + len(synthetic) - return tuple(all) - - @property - def organic_fields(self) -> Sequence[str]: - return sorted(self.metadata_plugin.field_mapping.keys()) - - @property - def synthetic_fields(self) -> Sequence[str]: - return self.metadata_plugin.special_fields.accessible, - - def __init__(self): - super().__init__(app_name=config.service_name, - globals=globals(), - spec=spec) - - @attr.s(kw_only=True, auto_attribs=True, frozen=True) - class Pagination(Pagination): - self_url: furl - - def link(self, *, previous: bool, **params: str) -> furl | None: - search_key = self.search_before if previous else self.search_after - if search_key is None: - return None - else: - before_or_after = 'before' if previous else 'after' - params = { - **params, - f'search_{before_or_after}': json.dumps(search_key), - 'sort': self.sort, - 'order': self.order, - 'size': self.size - } - return furl(url=self.self_url, args=params) - - def get_pagination(self, entity_type: str) -> Pagination: - default_sorting = self.metadata_plugin.exposed_indices[entity_type] - params = self.current_request.query_params or {} - sb, sa = params.get('search_before'), params.get('search_after') - if sb is None: - if sa is not None: - sa = tuple(json.loads(sa)) - else: - if sa is None: - sb = tuple(json.loads(sb)) - else: - raise BRE('Only one of search_after or search_before may be set') - try: - return self.Pagination(order=params.get('order', default_sorting.order), - size=int(params.get('size', '10')), - sort=params.get('sort', default_sorting.field_name), - search_before=sb, - search_after=sa, - self_url=self.self_url) - except AssertionError as e: - if R.caused(e): - raise R.propagate(e, ChaliceViewError) - else: - raise - - def file_url(self, - *, - catalog: CatalogName, - file_uuid: str, - fetch: bool = True, - **params: str - ) -> mutable_furl: - file_uuid = urllib.parse.quote(file_uuid, safe='') - view_function = fetch_repository_files if fetch else repository_files - path = one(view_function.path) - url = self.base_url.add(path=path.format(file_uuid=file_uuid)) - return url.set(args=dict(catalog=catalog, **params)) - - def _authenticate(self) -> OAuth2 | None: - try: - header = self.current_request.headers['Authorization'] - except KeyError: - return None - else: - try: - auth_type, auth_token = header.split() - except ValueError: - raise UnauthorizedError(header) - else: - if auth_type.lower() == 'bearer': - return OAuth2(auth_token) - else: - raise UnauthorizedError(header) - - def manifest_url(self, - *, - fetch: bool, - token_or_key: str | None = None, - **params: str - ) -> mutable_furl: - if token_or_key is None: - handler = fetch_file_manifest if fetch else file_manifest - path = one(handler.path) - else: - handler = fetch_file_manifest_with_token if fetch else file_manifest_with_token - path: str = one(handler.path) - path = path.format(token=token_or_key) - url = self.base_url.add(path=path) - return url.set(args=params) - - -app = ServiceApp() -configure_app_logging(app, log) - -globals().update(app.default_routes()) - - -@app.route( - '/oauth2_redirect', - enabled=config.google_oauth2_client_id is not None, - cache_control='no-store', - interactive=False, - spec={ - 'summary': 'Destination endpoint for Google OAuth 2.0 redirects', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': fd(''' - The response body is HTML page with a script that extracts - the access token and redirects back to the Swagger UI. - ''') - } - } - } -) -def oauth2_redirect(): - file_name = 'oauth2-redirect.html.template.mustache' - template = app.load_static_resource('swagger', file_name) - nonce = CSP.new_nonce() - html = chevron.render(template, { - 'CSP_NONCE': json.dumps(nonce) - }) - csp = CSP.for_azul(nonce) - return Response(status_code=200, - headers={ - 'Content-Type': 'text/html', - 'Content-Security-Policy': str(csp) - }, - body=html) - - -def validate_repository_search(entity_type: EntityType, - params: Mapping[str, str], - **validators): - validate_params(params, **{ - 'catalog': validate_catalog, - 'filters': validate_filters, - 'order': validate_order, - 'search_after': partial(validate_json_param, 'search_after'), - 'search_after_uid': str, - 'search_before': partial(validate_json_param, 'search_before'), - 'search_before_uid': str, - 'size': partial(validate_size, entity_type), - 'sort': validate_field, - **validators - }) - - -def validate_entity_type(entity_type: str): - entity_types = app.metadata_plugin.exposed_indices.keys() - if entity_type not in entity_types: - raise BRE(f'Entity type {entity_type!r} is invalid for catalog ' - f'{app.catalog!r}. Must be one of {set(entity_types)}.') - - -min_page_size = 1 - - -def validate_size(entity_type: EntityType, size: str): - sorting = app.metadata_plugin.exposed_indices[entity_type] - try: - size = int(size) - except BaseException: - raise BRE('Invalid value for parameter `size`') - else: - if size > sorting.max_page_size: - raise BRE(f'Invalid value for parameter `size`, ' - f'must not be greater than {sorting.max_page_size}') - elif size < min_page_size: - raise BRE('Invalid value for parameter `size`, must be greater than 0') - - -def validate_filters(filters): - filters = validate_json_param('filters', filters) - if type(filters) is not dict: - raise BRE('The `filters` parameter must be a dictionary') - field_types = app.repository_controller.field_types(app.catalog) - for field, filter_ in filters.items(): - validate_field(field, include_synthetic=True) - try: - relation, values = one(filter_.items()) - except Exception: - raise BRE(f'The `filters` parameter entry for `{field}` ' - f'must be a single-item dictionary') - else: - special_fields = app.metadata_plugin.special_fields - if field in (special_fields.source_id, special_fields.accessible): - valid_relations = ('is',) - disallow_null = True - else: - valid_relations = ('is', 'contains', 'within', 'intersects') - disallow_null = False - if relation in valid_relations: - if not isinstance(values, list): - raise BRE(f'The value of the `{relation}` relation in the `filters` ' - f'parameter entry for `{field}` is not a list') - if disallow_null and None in values: - raise BRE(f'The `{field}` field does not support null values') - else: - raise BRE(f'The relation in the `filters` parameter entry ' - f'for `{field}` must be one of {valid_relations}') - if relation == 'is': - value_types = reify(JSON | PrimitiveJSON) - if not all(isinstance(value, value_types) for value in values): - raise BRE(f'The value of the `is` relation in the `filters` ' - f'parameter entry for `{field}` is invalid') - if field == 'organismAge': - validate_organism_age_filter(values) - field_type = field_types[field] - if isinstance(field_type, Nested): - if relation != 'is': - raise BRE(f'The field `{field}` can only be filtered by the `is` relation') - try: - nested = one(values) - except ValueError: - raise BRE(f'The value of the `is` relation in the `filters` ' - f'parameter entry for `{field}` is not a single-item list') - try: - require(isinstance(nested, dict)) - except AssertionError as e: - if R.caused(e): - raise BRE(f'The value of the `is` relation in the `filters` ' - f'parameter entry for `{field}` must contain a dictionary') - else: - raise - extra_props = nested.keys() - field_type.properties.keys() - if extra_props: - raise BRE(f'The value of the `is` relation in the `filters` ' - f'parameter entry for `{field}` has invalid properties `{extra_props}`') - - -def validate_organism_age_filter(values): - for value in values: - try: - value_and_unit.to_index(value) - except AssertionError as e: - if R.caused(e): - raise R.propagate(e, BRE) - else: - raise - - -def validate_field(field: str, *, include_synthetic: bool = False): - fields = app.fields if include_synthetic else app.organic_fields - if field not in fields: - raise BRE(f'Unknown field `{field}`') - - -def validate_manifest_format(format: str): - supported_formats = {f.value for f in app.metadata_plugin.manifest_formats} - try: - ManifestFormat(format) - except ValueError: - raise BRE(f'Unknown manifest format `{format}`. ' - f'Must be one of {supported_formats}') - else: - if format not in supported_formats: - raise BRE(f'Manifest format `{format}` is not supported for ' - f'catalog {app.catalog}. Must be one of {supported_formats}') - - -def validate_order(order: str): - supported_orders = ('asc', 'desc') - if order not in supported_orders: - raise BRE(f'Unknown order `{order}`. Must be one of {supported_orders}') - - -def validate_json_param(name: str, value: str) -> MutableJSON: - try: - return json.loads(value) - except json.decoder.JSONDecodeError: - raise BRE(f'The {name!r} parameter is not valid JSON') - - -class Mandatory: - """ - Validation wrapper signifying that a parameter is mandatory. - """ - - def __init__(self, validator: Callable) -> None: - super().__init__() - self._validator = validator - - def __call__(self, param): - return self._validator(param) - - -deprecated_spec = { - 'summary': 'This endpoint will be removed in the future.', - 'tags': ['Deprecated'], - 'deprecated': True -} - - -@app.route( - '/index/catalogs', - methods=['GET'], - cors=True, - spec={ - 'summary': 'List all available catalogs.', - 'tags': ['Index'], - 'responses': { - '200': { - 'description': fd(''' - The name of the default catalog and a list of all available - catalogs. For each catalog, the response includes the name - of the atlas the catalog belongs to, a flag indicating - whether the catalog is for internal use only as well as the - names and types of plugins currently active for the catalog. - For some plugins, the response includes additional - configuration properties, such as the sources used by the - repository plugin to populate the catalog or the set of - available [indices][1]. - - [1]: #operations-Index-get_index__entity_type_ - '''), - **responses.json_content( - # The custom return type annotation is an experiment. Please - # don't adopt this just yet elsewhere in the program. - signature(app.catalog_controller.list_catalogs).return_annotation - ) - } - } - } -) -def list_catalogs(): - return app.catalog_controller.list_catalogs() - - -generic_object_spec = schema.object(additionalProperties=True) -array_of_object_spec = schema.array(generic_object_spec) -hit_spec = schema.object( - additionalProperties=True, - protocols=array_of_object_spec, - entryId=str, - sources=array_of_object_spec, - samples=array_of_object_spec, - specimens=array_of_object_spec, - cellLines=array_of_object_spec, - donorOrganisms=array_of_object_spec, - organoids=schema.array(str), - cellSuspensions=array_of_object_spec -) - -page_spec = schema.object( - hits=schema.array(hit_spec), - pagination=generic_object_spec, - termFacets=generic_object_spec -) - - -def _filter_schema(field_type: FieldType) -> JSON: - relations = field_type.supported_filter_relations - - def filter_schema(relation: str) -> JSON: - return schema.object( - properties={relation: schema.array(field_type.api_filter_schema(relation))}, - required=[relation], - additionalProperties=False - ) - - if len(relations) == 1: - return filter_schema(one(relations)) - else: - return {'oneOf': list(map(filter_schema, relations))} - - -types = app.repository_controller.field_types(app.catalog) - -filters_param_spec = params.query( - 'filters', - schema.optional(application_json(schema.object( - default='{}', - example={'cellCount': {'within': [[10000, 1000000000]]}}, - properties={ - field: _filter_schema(types[field]) - for field in app.fields - } - ))), - description=fd(''' - Criteria to filter entities from the search results. - - Each filter consists of a field name, a relation (relational operator), - and an array of field values. The available relations are "is", - "within", "contains", and "intersects". Multiple filters are combined - using "and" logic. An entity must match all filters to be included in - the response. How multiple field values within a single filter are - combined depends on the relation. - - For the "is" relation, multiple values are combined using "or" logic. - For example, `{"fileFormat": {"is": ["fastq", "fastq.gz"]}}` selects - entities where the file format is either "fastq" or "fastq.gz". For the - "within", "intersects", and "contains" relations, the field values must - come in nested pairs specifying upper and lower bounds, and multiple - pairs are combined using "and" logic. For example, `{"donorCount": - {"within": [[1,5], [5,10]]}}` selects entities whose donor organism - count falls within both ranges, i.e., is exactly 5. - - The accessions field supports filtering for a specific accession and/or - namespace within a project. For example, `{"accessions": {"is": [ - {"namespace":"array_express"}]}}` will filter for projects that have an - `array_express` accession. Similarly, `{"accessions": {"is": [ - {"accession":"ERP112843"}]}}` will filter for projects that have the - accession `ERP112843` while `{"accessions": {"is": [ - {"namespace":"array_express", "accession": "E-AAAA-00"}]}}` will filter - for projects that match both values. - - The organismAge field is special in that it contains two property keys: - value and unit. For example, `{"organismAge": {"is": [{"value": "20", - "unit": "year"}]}}`. Both keys are required. `{"organismAge": {"is": - [null]}}` selects entities that have no organism age.''' + f''' - - Supported field names are: {', '.join(app.fields)} - ''') -) - -catalog_param_spec = params.query( - 'catalog', - schema.optional(schema.default(app.catalog, - form=schema.enum(*config.catalogs))), - description='The name of the catalog to query.') - - -def repository_search_params_spec(): - return [ - catalog_param_spec, - filters_param_spec, - params.path( - 'entity_type', - schema.enum(*app.metadata_plugin.exposed_indices.keys()), - description='Which index to search.' - ), - params.query( - 'size', - schema.optional(schema.default(10, form=schema.range(min_page_size, None))), - description=fd(''' - The number of hits included per page. The maximum size allowed - depends on the catalog and entity type. - ''') - ), - params.query( - 'sort', - schema.optional(schema.enum(*app.organic_fields)), - description=fd(''' - The field to sort the hits by. The default value depends on the - entity type. - ''') - ), - params.query( - 'order', - schema.optional(schema.enum('asc', 'desc')), - description=fd(''' - The ordering of the sorted hits, either ascending or descending. - The default value depends on the entity type. - ''') - ), - *[ - params.query( - param, - schema.optional(str), - description=fd(''' - Use the `next` and `previous` properties of the - `pagination` response element to navigate between pages. - '''), - deprecated=True) - for param in [ - 'search_before', - 'search_before_uid', - 'search_after', - 'search_after_uid' - ] - ] - ] - - -def parameter_hoisting_note(method: str, - endpoint: str, - equivalent_method: str - ) -> str: - return fd(''' - Any of the query parameters documented below can alternatively be passed - as a property of a JSON object in the body of the request. This can be - useful in case the value of the `filters` query parameter causes the URL - to exceed the maximum length of 8192 characters, resulting in a 413 - Request Entity Too Large response. - - The request `%s %s?filters={…}`, for example, is equivalent to `%s %s` - with the body `{"filters": "{…}"}` in which any double quotes or - backslash characters inside `…` are escaped with another backslash. That - escaping is the requisite procedure for embedding one JSON structure - inside another. - ''' % (method, endpoint, equivalent_method, endpoint)) - - -def repository_search_spec(*, post: bool): - id_spec_link = '#operations-Index-get_index__entity_type___entity_id_' - return { - 'summary': fd(f''' - Search an index for entities of interest - {", with filters provided in the request body" if post else ""}. - '''), - 'deprecated': post, - 'description': - iif(post, parameter_hoisting_note('GET', '/index/files', 'POST') + fd(''' - - Note that the Swagger UI can't currently be used to pass a body. - - Please also note that this endpoint should be considered beta and - may change or disappear in the future. That is the reason for the - deprecation. - ''')), - 'tags': ['Index'], - 'parameters': repository_search_params_spec(), - 'responses': { - '200': { - 'description': fd(f''' - Paginated list of entities that meet the search criteria - ("hits"). The structure of these hits is documented under - the [corresponding endpoint for a specific - entity]({id_spec_link}). - - The `pagination` section describes the total number of hits - and total number of pages, as well as user-supplied search - parameters for page size and sorting behavior. It also - provides links for navigating forwards and backwards between - pages of results. - - The `termFacets` section tabulates the occurrence of unique - values within nested fields of the `hits` section across all - entities meeting the filter criteria (this includes entities - not listed on the current page, meaning that this section - will be invariable across all pages from the same search). - Not every nested field is tabulated, but the set of - tabulated fields is consistent between entity types. - '''), - **responses.json_content(page_spec) - } - } - } - - -def repository_id_spec(): - search_spec_link = '#operations-Index-get_index__entity_type_' - return { - 'summary': 'Detailed information on a particular entity.', - 'tags': ['Index'], - 'parameters': [ - catalog_param_spec, - params.path('entity_type', str, description='The type of the desired entity'), - params.path('entity_id', str, description='The UUID of the desired entity') - ], - 'responses': { - '200': { - 'description': fd(f''' - This response describes a single entity. To search the index - for multiple entities, see the [corresponding search - endpoint]({search_spec_link}). - - The properties that are common to all entity types are - listed in the schema below; however, additional properties - may be present for certain entity types. With the exception - of the entity's unique identifier, all properties are - arrays, even in cases where only one value is present. - - The structures of the objects within these arrays are not - perfectly consistent, since they may represent either - singleton entities or aggregations depending on context. - - For example, any biomaterial that yields a cell suspension - which yields a sequence file will be considered a "sample". - Therefore, the `samples` field is polymorphic, and each - sample may be either a specimen, an organoid, or a cell line - (the field `sampleEntityType` can be used to discriminate - between these cases). - '''), - **responses.json_content(hit_spec) - } - } - } - - -def repository_head_spec(for_summary: bool = False): - search_spec_link = f'#operations-Index-get_index_{"summary" if for_summary else "_entity_type_"}' - return { - 'summary': 'Perform a query without returning its result.', - 'tags': ['Index'], - 'responses': { - '200': { - 'description': fd(f''' - The HEAD method can be used to test whether an index is - operational, or to check the validity of query parameters - for the [GET method]({search_spec_link}). - ''') - } - } - } - - -def repository_head_search_spec(): - return { - **repository_head_spec(), - 'parameters': repository_search_params_spec() - } - - -repository_summary_spec = { - 'tags': ['Index'], - 'parameters': [catalog_param_spec, filters_param_spec] -} - - -@app.route( - '/index/{entity_type}', - methods=['GET'], - spec=repository_search_spec(post=False), - cors=True -) -# FIXME: Properly document the POST version of /index -# https://github.com/DataBiosphere/azul/issues/5900 -@app.route( - '/index/{entity_type}', - methods=['POST'], - content_types=['application/json'], - spec=repository_search_spec(post=True), - cors=True -) -@app.route( - '/index/{entity_type}', - methods=['HEAD'], - spec=repository_head_search_spec(), - cors=True -) -@app.route( - '/index/{entity_type}/{entity_id}', - methods=['GET'], - spec=repository_id_spec(), - cors=True -) -def repository_search(entity_type: str, entity_id: str | None = None) -> JSON: - request = app.current_request - query_params = request.query_params or {} - _hoist_parameters(query_params, request) - validate_repository_search(entity_type, query_params) - validate_entity_type(entity_type) - return app.repository_controller.search(catalog=app.catalog, - entity_type=entity_type, - item_id=entity_id, - filters=query_params.get('filters'), - pagination=app.get_pagination(entity_type), - authentication=request.authentication) - - -def _hoist_parameters(query_params, request): - if request.method in ('POST', 'PUT'): - body = request.json_body - if body is not None: - if not isinstance(body, dict): - raise BRE('Request body is not a JSON object') - elif body.keys() & query_params.keys(): - raise BRE('Conflicting keys between body and query parameters') - else: - query_params.update(body) - - -@app.route( - '/index/summary', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Statistics on the data present across all entities.', - 'responses': { - '200': { - # FIXME: Add 'projects' to API documentation & schema - # https://github.com/DataBiosphere/azul/issues/3917 - 'description': fd(''' - Counts the total number and total size in bytes of assorted - entities, subject to the provided filters. - - `fileTypeSummaries` provides the count and total size in - bytes of files grouped by their format, e.g. "fastq" or - "matrix." `fileCount` and `totalFileSize` compile these - figures across all file formats. Likewise, - `cellCountSummaries` counts cells and their associated - documents grouped by organ type, with `organTypes` listing - all referenced organs. - - Total counts of unique entities are also provided for other - entity types such as projects and tissue donors. These - values are not grouped/aggregated. - '''), - **responses.json_content( - schema.object( - additionalProperties=True, - organTypes=schema.array(str), - totalFileSize=float, - fileTypeSummaries=array_of_object_spec, - cellCountSummaries=array_of_object_spec, - donorCount=int, - fileCount=int, - labCount=int, - projectCount=int, - speciesCount=int, - specimenCount=int - ) - ) - } - }, - **repository_summary_spec - } -) -@app.route( - '/index/summary', - methods=['HEAD'], - spec={ - **repository_head_spec(for_summary=True), - **repository_summary_spec - } -) -def get_summary(): - """ - Returns a summary based on the filters passed on to the call. Based on the - ICGC endpoint. - :return: Returns a jsonified Summary API response - """ - request = app.current_request - query_params = request.query_params or {} - validate_params(query_params, - filters=str, - catalog=validate_catalog) - filters = query_params.get('filters', '{}') - validate_filters(filters) - return app.repository_controller.summary(catalog=app.catalog, - filters=filters, - authentication=request.authentication) - - -def manifest_route(*, fetch: bool, initiate: bool): - return app.route( - # The path parameter could be a token *or* an object key, but we don't - # want to complicate the API with this detail - ('/fetch' if fetch else '') - + ('/manifest/files' if initiate else '/manifest/files/{token}'), - # The initial PUT request is idempotent. - methods=['PUT' if initiate else 'GET'], - interactive=fetch, - cors=True, - path_spec=None if initiate else { - 'parameters': [ - params.path('token', str, description=fd(''' - An opaque string representing the manifest preparation job - ''')) - ] - }, - spec={ - 'tags': ['Manifests'], - 'summary': - ( - 'Initiate the preparation of a manifest' - if initiate else - 'Determine status of a manifest preparation job' - ) + ( - ' via XHR' if fetch else '' - ), - 'description': fd(''' - Create a manifest preparation job, returning either - - - a 301 redirect to the URL of the status of that job or - - - a 302 redirect to the URL of an already prepared manifest. - - This endpoint is not suitable for interactive use via the - Swagger UI. Please use [PUT /fetch/manifest/files][1] instead. - - [1]: #operations-Manifests-put_fetch_manifest_files - ''') + parameter_hoisting_note('PUT', '/manifest/files', 'PUT') - if initiate and not fetch else - fd(''' - Check on the status of an ongoing manifest preparation job, - returning either - - - a 301 redirect to this endpoint if the manifest job is still - running - - - a 302 redirect to the URL of the completed manifest. - - This endpoint is not suitable for interactive use via the - Swagger UI. Please use [GET /fetch/manifest/files/{token}][1] - instead. - - [1]: #operations-Manifests-get_fetch_manifest_files - ''') if not initiate and not fetch else fd(''' - Create a manifest preparation job, returning a 200 status - response whose JSON body emulates the HTTP headers that would be - found in a response to an equivalent request to the [PUT - /manifest/files][1] endpoint. - - Whenever client-side JavaScript code is used in a web - application to request the preparation of a manifest from Azul, - this endpoint should be used instead of [PUT - /manifest/files][1]. This way, the client can use XHR to make - the request, retaining full control over the handling of - redirects and enabling the client to bypass certain limitations - on the native handling of redirects in web browsers. For - example, most browsers ignore the `Retry-After` header in - redirect responses, causing them to prematurely exhaust the - upper limit on the number of consecutive redirects, before the - manifest generation job is done. - - [1]: #operations-Manifests-put_manifest_files - ''') + parameter_hoisting_note('PUT', '/fetch/manifest/files', 'PUT') - if initiate and fetch else - fd(''' - Check on the status of an ongoing manifest preparation job, - returning a 200 status response whose JSON body emulates the - HTTP headers that would be found in a response to an equivalent - request to the [GET /manifest/files/{token}][1] endpoint. - - Whenever client-side JavaScript code is used in a web - application to request the preparation of a manifest from Azul, - this endpoint should be used instead of [GET - /manifest/files/{token}][1]. This way, the client can use XHR to - make the request, retaining full control over the handling of - redirects and enabling the client to bypass certain limitations - on the native handling of redirects in web browsers. For - example, most browsers ignore the `Retry-After` header in - redirect responses, causing them to prematurely exhaust the - upper limit on the number of consecutive redirects, before the - manifest generation job is done. - - [1]: #operations-Manifests-get_manifest_files - '''), - 'parameters': [ - catalog_param_spec, - filters_param_spec, - params.query( - 'format', - schema.optional( - schema.enum( - *[ - format.value - for format in app.metadata_plugin.manifest_formats - ], - form=str - ) - ), - description=f''' - The desired format of the output. - - - `{ManifestFormat.compact.value}` (the default) for a compact, - tab-separated manifest - - - `{ManifestFormat.terra_pfb.value}` for a manifest in the [PFB - format][2]. This format is mainly used for exporting data to - Terra. - - - `{ManifestFormat.curl.value}` for a [curl configuration - file][3] manifest. This manifest can be used with the curl - program to download all the files listed in the manifest. - - - `{ManifestFormat.verbatim_jsonl.value}` for a verbatim - manifest in [JSONL][4] format. Each line contains an - unaltered metadata entity from the underlying repository. - - - `{ManifestFormat.verbatim_pfb.value}` for a verbatim - manifest in the [PFB format][2]. This format is mainly - used for exporting data to Terra. - - [1]: https://software.broadinstitute.org/firecloud/documentation/article?id=10954 - - [2]: https://github.com/uc-cdis/pypfb - - [3]: https://curl.haxx.se/docs/manpage.html#-K - - [4]: https://jsonlines.org/ - ''' - ) - ] if initiate else [], - 'responses': { - '301': { - 'description': fd(f''' - A redirect indicating that the manifest preparation job - {'has started' if initiate else 'is running'}. Wait for - the recommended number of seconds (see `Retry-After` - header) and then follow the redirect to check the status - of {'that job' if initiate else 'the job again'}. - '''), - 'headers': { - 'Location': { - 'description': fd(''' - The URL of the manifest preparation job at - ''') + fd('''the [`GET - /manifest/files/{token}`][2] endpoint. - - [2]: #operations-Manifests-get_fetch_manifest_files_token - ''') if initiate else fd(''' - The URL of this endpoint - '''), - 'schema': {'type': 'string', 'format': 'url'} - }, - 'Retry-After': { - 'description': fd(''' - The recommended number of seconds to wait before - requesting the URL specified in the `Location` - header - '''), - 'schema': {'type': 'string'} - } - } - }, - '302': { - 'description': fd(f''' - A redirect indicating that the manifest preparation job - is {'already' if initiate else 'now'} done. Immediately - follow the redirect to obtain the manifest contents. - - The response body contains, for a number of commonly - used shells, a command line suitable for downloading the - manifest. - '''), - 'headers': { - 'Location': { - 'description': fd(''' The URL of the manifest. - Clients should not make any assumptions about - any parts of the returned domain, except that - the scheme will be `https`. - '''), - 'schema': {'type': 'string', 'format': 'url'} - } - } - }, - **({} if initiate else { - '410': { - 'description': fd(''' - The manifest preparation job has expired. Request a - new preparation using the `PUT /manifest/files` - endpoint. - ''') - } - }) - } if not fetch else { - '200': { - 'description': fd(''' - When handling this response, clients should wait the - number of seconds given in the `Retry-After` property of - the response body and then make another XHR request to - the URL specified in the `Location` property. - - For a detailed description of these properties see the - documentation for the respective response headers - documented under ''') + (fd(''' - [PUT /manifest/files][1]. - - [1]: #operations-Manifests-put_manifest_files - ''') if initiate else fd(''' - [GET /manifest/files/{token}][1]. - - [1]: #operations-Manifests-get_manifest_files - ''')) + fd(''' - - Note: For a 200 status code response whose body has the - `Status` property set to 302, the `Location` property - may reference the [GET /manifest/files/{token}][2] - endpoint and that endpoint may return yet another - redirect, this time a genuine (not emulated) 302 status - redirect to the actual location of the manifest. - - [2]: #operations-Manifests-get_manifest_files - - Note: A 200 status response with a `Status` property of - 302 in its body additionally contains a `CommandLine` - property that lists, for a number of commonly used - shells, a command line suitable for downloading the - manifest. - '''), - **responses.json_content( - schema.object( - Status=int, - Location={'type': 'string', 'format': 'url'}, - **{'Retry-After': schema.optional(int)}, - CommandLine=schema.optional(schema.object(**{ - key: str - for key in CurlManifestGenerator.command_lines(url=furl(''), - file_name='', - authentication=None) - })) - ) - ), - } - } - - } - ) - - -@manifest_route(fetch=False, initiate=True) -def file_manifest(): - return _file_manifest(fetch=False) - - -@manifest_route(fetch=False, initiate=False) -def file_manifest_with_token(token: str): - return _file_manifest(fetch=False, token_or_key=token) - - -@manifest_route(fetch=True, initiate=True) -def fetch_file_manifest(): - return _file_manifest(fetch=True) - - -@manifest_route(fetch=True, initiate=False) -def fetch_file_manifest_with_token(token: str): - return _file_manifest(fetch=True, token_or_key=token) - - -def _file_manifest(fetch: bool, token_or_key: str | None = None): - request = app.current_request - query_params = request.query_params or {} - _hoist_parameters(query_params, request) - if token_or_key is None: - query_params.setdefault('filters', '{}') - # We list the `catalog` validator first so that the catalog is validated - # before any other potentially catalog-dependent validators are invoked - validate_params(query_params, - catalog=validate_catalog, - format=validate_manifest_format, - filters=validate_filters) - # Now that the catalog is valid, we can provide the default format that - # depends on it - default_format = app.metadata_plugin.manifest_formats[0].value - query_params.setdefault('format', default_format) - else: - validate_params(query_params) - return app.manifest_controller.get_manifest_async(query_params=query_params, - token_or_key=token_or_key, - fetch=fetch, - authentication=request.authentication) - - -@app.lambda_function(name=config.manifest_sfn) -def generate_manifest(event: AnyJSON, _context: LambdaContext): - assert isinstance(event, Mapping) - assert all(isinstance(k, str) for k in event.keys()) - return app.manifest_controller.get_manifest(event) - - -file_fqid_parameters_spec = [ - params.path( - 'file_uuid', - str, - description='The UUID of the file to be returned.'), - params.query( - 'version', - schema.optional(str), - description=fd(''' - The version of the file to be returned. File versions are opaque - strings with only one documented property: they can be - lexicographically compared with each other in order to determine - which version is more recent. If this parameter is omitted then the - most recent version of the file is returned. - ''') - ) -] - -repository_files_spec = { - 'tags': ['Repository'], - 'parameters': [ - catalog_param_spec, - *file_fqid_parameters_spec, - params.query( - 'fileName', - schema.optional(str), - description=fd(''' - The desired name of the file. The given value will be included - in the Content-Disposition header of the response. If absent, a - best effort to determine the file name from metadata will be - made. If that fails, the UUID of the file will be used instead. - ''') - ), - params.query( - 'wait', - schema.optional(int), - description=fd(''' - If 0, the client is responsible for honoring the waiting period - specified in the Retry-After response header. If 1, the server - will delay the response in order to consume as much of that - waiting period as possible. This parameter should only be set to - 1 by clients who can't honor the `Retry-After` header, - preventing them from quickly exhausting the maximum number of - redirects. If the server cannot wait the full amount, any amount - of wait time left will still be returned in the Retry-After - header of the response. - ''') - ), - params.query( - 'replica', - schema.optional(str), - description=fd(''' - If the underlying repository offers multiple replicas of the - requested file, use the specified replica. Otherwise, this - parameter is ignored. If absent, the only replica — for - repositories that don't support replication — or the default - replica — for those that do — will be used. - ''') - ), - params.query( - 'requestIndex', - schema.optional(int), - description='Do not use. Reserved for internal purposes.' - ), - params.query( - 'drsUri', - schema.optional(str), - description='Do not use. Reserved for internal purposes.' - ), - params.query('token', - schema.optional(str), - description='Reserved. Do not pass explicitly.') - ] -} - - -@app.route( - '/repository/files/{file_uuid}', - methods=['GET'], - interactive=False, - cors=True, - spec={ - **repository_files_spec, - 'summary': 'Redirect to a URL for downloading a given data file from the ' - 'underlying repository', - 'description': fd(''' - This endpoint is not suitable for interactive use via the Swagger - UI. Please use the [/fetch endpoint][1] instead. - - [1]: #operations-Repository-get_fetch_repository_files__file_uuid_ - '''), - 'responses': { - '301': { - 'description': fd(''' - A URL to the given file is still being prepared. Retry by - waiting the number of seconds specified in the `Retry-After` - header of the response and the requesting the URL specified - in the `Location` header. - '''), - 'headers': { - 'Location': responses.header(str, description=fd(''' - A URL pointing back at this endpoint, potentially with - different or additional request parameters. - ''')), - 'Retry-After': responses.header(int, description=fd(''' - Recommended number of seconds to wait before requesting - the URL specified in the `Location` header. The response - may carry this header even if server-side waiting was - requested via `wait=1`. - ''')) - } - }, - '302': { - 'description': fd(''' - The file can be downloaded from the URL returned in the - `Location` header. - '''), - 'headers': { - 'Location': responses.header(str, description=fd(''' - A URL that will yield the actual content of the file. - ''')), - 'Content-Disposition': responses.header(str, description=fd(''' - Set to a value that makes user agents download the file - instead of rendering it, suggesting a meaningful name - for the downloaded file stored on the user's file - system. The suggested file name is taken from the - `fileName` request parameter or, if absent, from - metadata describing the file. It generally does not - correlate with the path component of the URL returned in - the `Location` header. - ''')) - } - } - } - } -) -def repository_files(file_uuid: str) -> Response: - result = _repository_files(file_uuid, fetch=False) - status_code = result.pop('Status') - return Response(body='', - headers={k: str(v) for k, v in result.items()}, - status_code=status_code) - - -@app.route( - '/fetch/repository/files/{file_uuid}', - methods=['GET'], - cors=True, - spec={ - **repository_files_spec, - 'summary': 'Request a URL for downloading a given data file', - 'responses': { - '200': { - 'description': fd(f''' - Emulates the response code and headers of - {one(repository_files.path)} while bypassing the default - user agent behavior. Note that the status code of a - successful response will be 200 while the `Status` field of - its body will be 302. - - The response described here is intended to be processed by - client-side Javascript such that the emulated headers can be - handled in Javascript rather than relying on the native - implementation by the web browser. - '''), - **responses.json_content( - schema.object( - Status=int, - Location=str - ) - ) - } - } - } -) -def fetch_repository_files(file_uuid: str) -> Response: - body = _repository_files(file_uuid, fetch=True) - return Response(body=json.dumps(body), status_code=200) - - -def _repository_files(file_uuid: str, fetch: bool) -> MutableJSON: - request = app.current_request - query_params = request.query_params or {} - headers = request.headers - - # FIXME: Prevent duplicate filenames from files in different subgraphs by - # prepending the subgraph UUID to each filename when downloaded - # https://github.com/DataBiosphere/azul/issues/2682 - - catalog = app.catalog - return app.repository_controller.download_file(catalog=catalog, - fetch=fetch, - file_uuid=file_uuid, - query_params=query_params, - headers=headers, - authentication=request.authentication) - - -@app.route( - '/repository/sources', - methods=['GET'], - cors=True, - spec={ - 'summary': 'List available data sources', - 'tags': ['Repository'], - 'parameters': [catalog_param_spec], - 'responses': { - '200': { - 'description': fd(''' - List the sources the currently authenticated user is - authorized to access in the underlying data repository. - '''), - **responses.json_content( - schema.object(sources=schema.array( - schema.object( - sourceId=str, - sourceSpec=str - ) - )) - ) - } - } - } -) -def list_sources() -> Response: - validate_params(app.current_request.query_params or {}, - catalog=validate_catalog) - sources = app.repository_controller.list_sources(app.catalog, - app.current_request.authentication) - return Response(body={'sources': sources}, status_code=200) - - -def hash_url(url): - url_hash = hashlib.sha1(bytes(url, encoding='utf-8')).digest() - return base64.urlsafe_b64encode(url_hash).decode() - - -drs_spec_description = fd(''' - This is a partial implementation of the [DRS 1.0.0 spec][1]. Not all - features are implemented. This endpoint acts as a DRS-compliant proxy for - accessing files in the underlying repository. - - [1]: https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/ - - Any errors encountered from the underlying repository are forwarded on as - errors from this endpoint. -''') - - -@app.route( - drs.drs_object_url_path(object_id='{file_uuid}'), - methods=['GET'], - enabled=config.is_dss_enabled(), - cors=True, - spec={ - 'summary': 'Get file DRS object', - 'tags': ['DRS'], - 'description': fd(''' - This endpoint returns object metadata, and a list of access methods - that can be used to fetch object bytes. - ''') + drs_spec_description, - 'parameters': file_fqid_parameters_spec, - 'responses': { - '200': { - 'description': fd( - ''' - A DRS object is returned. Two [`AccessMethod`s][1] are - included: - - [1]: {link} - - {access_methods} - - If the object is not immediately ready, an `access_id` will - be returned instead of an `access_url`. - ''', - access_methods='\n'.join(f'- {am!s}' for am in AccessMethod), - link='https://ga4gh.github.io/data-repository-service-schemas' - '/preview/release/drs-1.1.0/docs/#_accessmethod'), - **app.drs_controller.get_object_response_schema() - } - } - } -) -def get_data_object(file_uuid): - """ - Return a DRS data object dictionary for a given DSS file UUID and version. - - If the file is already checked out, we can return a drs_object with a URL - immediately. Otherwise, we need to send the request through the /access - endpoint. - """ - query_params = app.current_request.query_params or {} - validate_params(query_params, version=str) - return app.drs_controller.get_object(file_uuid, query_params) - - -@app.route( - drs.drs_object_url_path(object_id='{file_uuid}', access_id='{access_id}'), - methods=['GET'], - enabled=config.is_dss_enabled(), - cors=True, - spec={ - 'summary': 'Get a file with an access ID', - 'description': fd(''' - This endpoint returns a URL that can be used to fetch the bytes of a - DRS object. - - This method only needs to be called when using an `AccessMethod` - that contains an `access_id`. - - An `access_id` is returned when the underlying file is not ready. - When the underlying repository is the DSS, the 202 response allowed - time for the DSS to do a checkout. - ''') + drs_spec_description, - 'parameters': [ - *file_fqid_parameters_spec, - params.path('access_id', str, description='Access ID returned from a previous request') - ], - 'responses': { - '202': { - 'description': fd(''' - This response is issued if the object is not yet ready. - Respect the `Retry-After` header, then try again. - '''), - 'headers': { - 'Retry-After': responses.header(str, description=fd(''' - Recommended number of seconds to wait before requesting - the URL specified in the Location header. - ''')) - } - }, - '200': { - 'description': fd(''' - The object is ready. The URL is in the response object. - '''), - **responses.json_content(schema.object(url=str)) - } - }, - 'tags': ['DRS'] - } -) -def get_data_object_access(file_uuid, access_id): - query_params = app.current_request.query_params or {} - validate_params(query_params, version=str) - return app.drs_controller.get_object_access(access_id, file_uuid, query_params) - - -@app.route( - drs.dos_object_url_path('{file_uuid}'), - methods=['GET'], - enabled=config.is_dss_enabled(), - cors=True, - spec=deprecated_spec -) -def dos_get_data_object(file_uuid): - """ - Return a DRS data object dictionary for a given DSS file UUID and version. - """ - request = app.current_request - query_params = request.query_params or {} - validate_params(query_params, - version=str, - catalog=validate_catalog) - catalog = app.catalog - file_version = query_params.get('version') - return app.drs_controller.dos_get_object(catalog, - file_uuid, - file_version, - request.authentication) diff --git a/lambdas/service/openapi.json b/lambdas/service/openapi.json deleted file mode 100644 index cd07900746..0000000000 --- a/lambdas/service/openapi.json +++ /dev/null @@ -1,11835 +0,0 @@ -{ - "openapi": "3.0.1", - "info": { - "title": "azul-service-dev", - "version": "13.0", - "description": "\n# Overview\n\nAzul is a REST web service for querying metadata associated with\nboth experimental and analysis data from a data repository. In order\nto deliver response times that make it suitable for interactive use\ncases, the set of metadata properties that it exposes for sorting,\nfiltering, and aggregation is limited. Azul provides a uniform view\nof the metadata over a range of diverse schemas, effectively\nshielding clients from changes in the schemas as they occur over\ntime. It does so, however, at the expense of detail in the set of\nmetadata properties it exposes and in the accuracy with which it\naggregates them.\n\nAzul denormalizes and aggregates metadata into several different\nindices for selected entity types. Metadata entities can be queried\nusing the [Index](#operations-tag-Index) endpoints.\n\nA set of indices forms a catalog. There is a default catalog called\n`dcp2` which will be used unless a\ndifferent catalog name is specified using the `catalog` query\nparameter. Metadata from different catalogs is completely\nindependent: a response obtained by querying one catalog does not\nnecessarily correlate to a response obtained by querying another\none. Two catalogs can contain metadata from the same sources or\ndifferent sources. It is only guaranteed that the body of a\nresponse by any given endpoint adheres to one schema,\nindependently of which catalog was specified in the request.\n\nAzul provides the ability to download data and metadata via the\n[Manifests](#operations-tag-Manifests) endpoints. The\n`curl` format manifests can be used to\ndownload data files. Other formats provide various views of the\nmetadata. Manifests can be generated for a selection of files using\nfilters. These filters are interchangeable with the filters used by\nthe [Index](#operations-tag-Index) endpoints.\n\nAzul also provides a [summary](#operations-Index-get_index_summary)\nview of indexed data.\n\n## Data model\n\nAny index, when queried, returns a JSON array of hits. Each hit\nrepresents a metadata entity. Nested in each hit is a summary of the\nproperties of entities associated with the hit. An entity is\nassociated either by a direct edge in the original metadata graph,\nor indirectly as a series of edges. The nested properties are\ngrouped by the type of the associated entity. The properties of all\ndata files associated with a particular sample, for example, are\nlisted under `hits[*].files` in a `/index/samples` response. It is\nimportant to note that while each _hit_ represents a discrete\nentity, the properties nested within that hit are the result of an\naggregation over potentially many associated entities.\n\nTo illustrate this, consider a data file that is part of two\nprojects (a project is a group of related experiments, typically by\none laboratory, institution or consortium). Querying the `files`\nindex for this file yields a hit looking something like:\n\n```\n{\n \"projects\": [\n {\n \"projectTitle\": \"Project One\"\n \"laboratory\": ...,\n ...\n },\n {\n \"projectTitle\": \"Project Two\"\n \"laboratory\": ...,\n ...\n }\n ],\n \"files\": [\n {\n \"format\": \"pdf\",\n \"name\": \"Team description.pdf\",\n ...\n }\n ]\n}\n```\n\nThis example hit contains two kinds of nested entities (a hit in an\nactual Azul response will contain more): There are the two projects\nentities, and the file itself. These nested entities contain\nselected metadata properties extracted in a consistent way. This\nmakes filtering and sorting simple.\n\nAlso notice that there is only one file. When querying a particular\nindex, the corresponding entity will always be a singleton like\nthis.\n\n\n## Contact us\n\nFor technical support please file an issue at\n[GitHub](https://github.com/DataBiosphere/azul/issues) or email\n`azul-group@ucsc.edu`. To report a security concern or misconduct please email\n`azul-group@ucsc.edu`.\n" - }, - "tags": [ - { - "name": "Index", - "description": "\nQuery the indices for entities of interest\n" - }, - { - "name": "Manifests", - "description": "\nComplete listing of files matching a given filter in TSV and\nother formats\n" - }, - { - "name": "Repository", - "description": "\nAccess to data files in the underlying repository\n" - }, - { - "name": "Auxiliary", - "description": "\nDescribes various aspects of the Azul service\n" - } - ], - "paths": { - "/": { - "get": { - "summary": "Redirect to the Swagger UI for interactive use of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "301": { - "description": "A redirect to the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/index.html": { - "get": { - "summary": "The Swagger UI for interactive use of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is an HTML page containing the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/swagger-initializer.js": { - "get": { - "summary": "Used internally by the Swagger UI", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is JavaScript used internally by the Swagger UI" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/swagger/{file}": { - "parameters": [ - { - "name": "file", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The name of a static file to be returned" - } - ], - "get": { - "summary": "Static files needed for the Swagger UI", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "The response body is the contents of the requested file" - }, - "404": { - "description": "The requested file does not exist" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/openapi.json": { - "get": { - "summary": "Return OpenAPI specifications for this REST API", - "description": "\nThis endpoint returns the [OpenAPI specifications]'\n(https://github.com/OAI/OpenAPI-Specification) for this REST\nAPI. These are the specifications used to generate the page\nyou are visiting now.\n", - "responses": { - "200": { - "description": "200 response", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "openapi": { - "type": "string" - }, - "info": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "tags": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "servers": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "paths": { - "type": "object", - "properties": {}, - "additionalProperties": false - }, - "components": { - "type": "object", - "properties": {}, - "additionalProperties": false - } - }, - "required": [ - "openapi", - "info", - "tags", - "servers", - "paths", - "components" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/version": { - "get": { - "summary": "Describe current version of this REST API", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "Version endpoint is reachable.", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "git": { - "type": "object", - "properties": { - "commit": { - "type": "string" - }, - "dirty": { - "type": "boolean" - } - }, - "required": [ - "commit", - "dirty" - ], - "additionalProperties": false - } - }, - "required": [ - "git" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/robots.txt": { - "get": { - "summary": "Robots Exclusion Protocol", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "\nThe robots.txt resource according to\n[RFC9309](https://datatracker.ietf.org/doc/html/rfc9309)\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/health": { - "get": { - "summary": "Complete health check", - "description": "\nHealth check of the service REST API and all\nresources it depends on. This may take long time to complete\nand exerts considerable load on the API. For that reason it\nshould not be requested frequently or by automated\nmonitoring facilities that would be better served by the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) or\n[`/health/cached`](#operations-Auxiliary-get_health_cached)\nendpoints.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/basic": { - "get": { - "summary": "Basic health check", - "description": "\nHealth check of only the REST API itself, excluding other\nresources that it depends on. A 200 response indicates that\nthe service is reachable via HTTP(S) but nothing\nmore.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/cached": { - "get": { - "summary": "Cached health check for continuous monitoring", - "description": "\nReturn a cached copy of the\n[`/health/fast`](#operations-Auxiliary-get_health_fast)\nresponse. This endpoint is optimized for continuously\nrunning, distributed health monitors such as Route 53 health\nchecks. The cache ensures that the service is not\noverloaded by these types of health monitors. The cache is\nupdated every minute.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "api_endpoints": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "api_endpoints": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/fast": { - "get": { - "summary": "Fast health check", - "description": "\nPerformance-optimized health check of the REST API and other\ncritical resources tht it depends on. This endpoint can be\nrequested more frequently than\n[`/health`](#operations-Auxiliary-get_health) but\nperiodically scheduled, automated requests should be made to\n[`/health/cached`](#operations-Auxiliary-get_health_cached).\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "api_endpoints": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "elasticsearch": {}, - "api_endpoints": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/health/{keys}": { - "parameters": [ - { - "name": "keys", - "in": "path", - "required": true, - "schema": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "api_endpoints", - "elasticsearch", - "other_lambdas", - "progress", - "queues" - ] - } - }, - "description": "\nA comma-separated list of keys selecting the health\nchecks to be performed. Each key corresponds to an\nentry in the response.\n" - } - ], - "get": { - "summary": "Selective health check", - "description": "\nThis endpoint allows clients to request a health check on a\nspecific set of resources. Each resource is identified by a\n*key*, the same key under which the resource appears in a\n[`/health`](#operations-Auxiliary-get_health) response.\n", - "responses": { - "200": { - "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - true - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": true - } - } - } - }, - "503": { - "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer and the\nmirror.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": { - "type": "object", - "properties": { - "up": { - "type": "boolean", - "enum": [ - false - ] - } - }, - "required": [ - "up" - ], - "additionalProperties": true - } - }, - "example": { - "other_lambdas": {}, - "queues": {}, - "progress": {}, - "api_endpoints": {}, - "elasticsearch": {}, - "up": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Auxiliary" - ] - } - }, - "/oauth2_redirect": { - "get": { - "summary": "Destination endpoint for Google OAuth 2.0 redirects", - "tags": [ - "Auxiliary" - ], - "responses": { - "200": { - "description": "\nThe response body is HTML page with a script that extracts\nthe access token and redirects back to the Swagger UI.\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/index/catalogs": { - "get": { - "summary": "List all available catalogs.", - "tags": [ - "Index" - ], - "responses": { - "200": { - "description": "\nThe name of the default catalog and a list of all available\ncatalogs. For each catalog, the response includes the name\nof the atlas the catalog belongs to, a flag indicating\nwhether the catalog is for internal use only as well as the\nnames and types of plugins currently active for the catalog.\nFor some plugins, the response includes additional\nconfiguration properties, such as the sources used by the\nrepository plugin to populate the catalog or the set of\navailable [indices][1].\n\n[1]: #operations-Index-get_index__entity_type_\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "default_catalog": { - "type": "string" - }, - "catalogs": { - "type": "object", - "properties": {}, - "additionalProperties": { - "type": "object", - "properties": { - "atlas": { - "type": "string" - }, - "internal": { - "type": "boolean" - }, - "plugins": { - "type": "object", - "properties": {}, - "additionalProperties": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "sources": { - "type": "array", - "items": { - "type": "string" - } - }, - "indices": { - "type": "object", - "properties": {}, - "additionalProperties": { - "type": "object", - "properties": { - "default_sort": { - "type": "string" - }, - "default_order": { - "type": "string" - } - }, - "required": [ - "default_sort", - "default_order" - ], - "additionalProperties": false - } - } - }, - "required": [ - "name" - ], - "additionalProperties": false - } - } - }, - "required": [ - "atlas", - "internal", - "plugins" - ], - "additionalProperties": false - } - } - }, - "required": [ - "default_catalog", - "catalogs" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/index/{entity_type}/{entity_id}": { - "get": { - "summary": "Detailed information on a particular entity.", - "tags": [ - "Index" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "entity_type", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The type of the desired entity" - }, - { - "name": "entity_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The UUID of the desired entity" - } - ], - "responses": { - "200": { - "description": "\nThis response describes a single entity. To search the index\nfor multiple entities, see the [corresponding search\nendpoint](#operations-Index-get_index__entity_type_).\n\nThe properties that are common to all entity types are\nlisted in the schema below; however, additional properties\nmay be present for certain entity types. With the exception\nof the entity's unique identifier, all properties are\narrays, even in cases where only one value is present.\n\nThe structures of the objects within these arrays are not\nperfectly consistent, since they may represent either\nsingleton entities or aggregations depending on context.\n\nFor example, any biomaterial that yields a cell suspension\nwhich yields a sequence file will be considered a \"sample\".\nTherefore, the `samples` field is polymorphic, and each\nsample may be either a specimen, an organoid, or a cell line\n(the field `sampleEntityType` can be used to discriminate\nbetween these cases).\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "protocols": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "entryId": { - "type": "string" - }, - "sources": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "samples": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "specimens": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "cellLines": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "donorOrganisms": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "organoids": { - "type": "array", - "items": { - "type": "string" - } - }, - "cellSuspensions": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - } - }, - "required": [ - "protocols", - "entryId", - "sources", - "samples", - "specimens", - "cellLines", - "donorOrganisms", - "organoids", - "cellSuspensions" - ], - "additionalProperties": true - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/index/{entity_type}": { - "head": { - "summary": "Perform a query without returning its result.", - "tags": [ - "Index" - ], - "responses": { - "200": { - "description": "\nThe HEAD method can be used to test whether an index is\noperational, or to check the validity of query parameters\nfor the [GET method](#operations-Index-get_index__entity_type_).\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - }, - { - "name": "entity_type", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": [ - "bundles", - "files", - "projects", - "samples" - ] - }, - "description": "Which index to search." - }, - { - "name": "size", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "minimum": 1, - "default": 10 - }, - "description": "\nThe number of hits included per page. The maximum size allowed\ndepends on the catalog and entity type.\n" - }, - { - "name": "sort", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "accessions", - "aggregateLastModifiedDate", - "aggregateSubmissionDate", - "aggregateUpdateDate", - "assayType", - "biologicalSex", - "bionetworkName", - "bundleUuid", - "bundleVersion", - "cellCount", - "cellLineType", - "contactName", - "contentDescription", - "dataUseRestriction", - "developmentStage", - "donorCount", - "donorDisease", - "duosId", - "effectiveCellCount", - "effectiveOrgan", - "entryId", - "fileFormat", - "fileId", - "fileName", - "fileSize", - "fileSource", - "fileVersion", - "genusSpecies", - "institution", - "instrumentManufacturerModel", - "isIntermediate", - "isTissueAtlasProject", - "laboratory", - "lastModifiedDate", - "libraryConstructionApproach", - "matrixCellCount", - "modelOrgan", - "modelOrganPart", - "nucleicAcidSource", - "organ", - "organPart", - "organismAge", - "organismAgeRange", - "pairedEnd", - "preservationMethod", - "project", - "projectDescription", - "projectEstimatedCellCount", - "projectId", - "projectTitle", - "publicationTitle", - "sampleDisease", - "sampleEntityType", - "sampleId", - "selectedCellType", - "sourceId", - "sourceSpec", - "specimenDisease", - "specimenOrgan", - "specimenOrganPart", - "submissionDate", - "tissueAtlas", - "updateDate", - "workflow" - ] - }, - "description": "\nThe field to sort the hits by. The default value depends on the\nentity type.\n" - }, - { - "name": "order", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "asc", - "desc" - ] - }, - "description": "\nThe ordering of the sorted hits, either ascending or descending.\nThe default value depends on the entity type.\n" - }, - { - "name": "search_before", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_before_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - } - ] - }, - "post": { - "summary": "\nSearch an index for entities of interest\n, with filters provided in the request body.\n", - "deprecated": true, - "description": "\nAny of the query parameters documented below can alternatively be passed\nas a property of a JSON object in the body of the request. This can be\nuseful in case the value of the `filters` query parameter causes the URL\nto exceed the maximum length of 8192 characters, resulting in a 413\nRequest Entity Too Large response.\n\nThe request `GET /index/files?filters={\u2026}`, for example, is equivalent to `POST /index/files`\nwith the body `{\"filters\": \"{\u2026}\"}` in which any double quotes or\nbackslash characters inside `\u2026` are escaped with another backslash. That\nescaping is the requisite procedure for embedding one JSON structure\ninside another.\n\n\nNote that the Swagger UI can't currently be used to pass a body.\n\nPlease also note that this endpoint should be considered beta and\nmay change or disappear in the future. That is the reason for the\ndeprecation.\n", - "tags": [ - "Index" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - }, - { - "name": "entity_type", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": [ - "bundles", - "files", - "projects", - "samples" - ] - }, - "description": "Which index to search." - }, - { - "name": "size", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "minimum": 1, - "default": 10 - }, - "description": "\nThe number of hits included per page. The maximum size allowed\ndepends on the catalog and entity type.\n" - }, - { - "name": "sort", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "accessions", - "aggregateLastModifiedDate", - "aggregateSubmissionDate", - "aggregateUpdateDate", - "assayType", - "biologicalSex", - "bionetworkName", - "bundleUuid", - "bundleVersion", - "cellCount", - "cellLineType", - "contactName", - "contentDescription", - "dataUseRestriction", - "developmentStage", - "donorCount", - "donorDisease", - "duosId", - "effectiveCellCount", - "effectiveOrgan", - "entryId", - "fileFormat", - "fileId", - "fileName", - "fileSize", - "fileSource", - "fileVersion", - "genusSpecies", - "institution", - "instrumentManufacturerModel", - "isIntermediate", - "isTissueAtlasProject", - "laboratory", - "lastModifiedDate", - "libraryConstructionApproach", - "matrixCellCount", - "modelOrgan", - "modelOrganPart", - "nucleicAcidSource", - "organ", - "organPart", - "organismAge", - "organismAgeRange", - "pairedEnd", - "preservationMethod", - "project", - "projectDescription", - "projectEstimatedCellCount", - "projectId", - "projectTitle", - "publicationTitle", - "sampleDisease", - "sampleEntityType", - "sampleId", - "selectedCellType", - "sourceId", - "sourceSpec", - "specimenDisease", - "specimenOrgan", - "specimenOrganPart", - "submissionDate", - "tissueAtlas", - "updateDate", - "workflow" - ] - }, - "description": "\nThe field to sort the hits by. The default value depends on the\nentity type.\n" - }, - { - "name": "order", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "asc", - "desc" - ] - }, - "description": "\nThe ordering of the sorted hits, either ascending or descending.\nThe default value depends on the entity type.\n" - }, - { - "name": "search_before", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_before_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - } - ], - "responses": { - "200": { - "description": "\nPaginated list of entities that meet the search criteria\n(\"hits\"). The structure of these hits is documented under\nthe [corresponding endpoint for a specific\nentity](#operations-Index-get_index__entity_type___entity_id_).\n\nThe `pagination` section describes the total number of hits\nand total number of pages, as well as user-supplied search\nparameters for page size and sorting behavior. It also\nprovides links for navigating forwards and backwards between\npages of results.\n\nThe `termFacets` section tabulates the occurrence of unique\nvalues within nested fields of the `hits` section across all\nentities meeting the filter criteria (this includes entities\nnot listed on the current page, meaning that this section\nwill be invariable across all pages from the same search).\nNot every nested field is tabulated, but the set of\ntabulated fields is consistent between entity types.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "hits": { - "type": "array", - "items": { - "type": "object", - "properties": { - "protocols": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "entryId": { - "type": "string" - }, - "sources": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "samples": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "specimens": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "cellLines": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "donorOrganisms": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "organoids": { - "type": "array", - "items": { - "type": "string" - } - }, - "cellSuspensions": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - } - }, - "required": [ - "protocols", - "entryId", - "sources", - "samples", - "specimens", - "cellLines", - "donorOrganisms", - "organoids", - "cellSuspensions" - ], - "additionalProperties": true - } - }, - "pagination": { - "type": "object", - "properties": {}, - "additionalProperties": true - }, - "termFacets": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "required": [ - "hits", - "pagination", - "termFacets" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - }, - "get": { - "summary": "\nSearch an index for entities of interest\n.\n", - "deprecated": false, - "description": "", - "tags": [ - "Index" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - }, - { - "name": "entity_type", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": [ - "bundles", - "files", - "projects", - "samples" - ] - }, - "description": "Which index to search." - }, - { - "name": "size", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "minimum": 1, - "default": 10 - }, - "description": "\nThe number of hits included per page. The maximum size allowed\ndepends on the catalog and entity type.\n" - }, - { - "name": "sort", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "accessions", - "aggregateLastModifiedDate", - "aggregateSubmissionDate", - "aggregateUpdateDate", - "assayType", - "biologicalSex", - "bionetworkName", - "bundleUuid", - "bundleVersion", - "cellCount", - "cellLineType", - "contactName", - "contentDescription", - "dataUseRestriction", - "developmentStage", - "donorCount", - "donorDisease", - "duosId", - "effectiveCellCount", - "effectiveOrgan", - "entryId", - "fileFormat", - "fileId", - "fileName", - "fileSize", - "fileSource", - "fileVersion", - "genusSpecies", - "institution", - "instrumentManufacturerModel", - "isIntermediate", - "isTissueAtlasProject", - "laboratory", - "lastModifiedDate", - "libraryConstructionApproach", - "matrixCellCount", - "modelOrgan", - "modelOrganPart", - "nucleicAcidSource", - "organ", - "organPart", - "organismAge", - "organismAgeRange", - "pairedEnd", - "preservationMethod", - "project", - "projectDescription", - "projectEstimatedCellCount", - "projectId", - "projectTitle", - "publicationTitle", - "sampleDisease", - "sampleEntityType", - "sampleId", - "selectedCellType", - "sourceId", - "sourceSpec", - "specimenDisease", - "specimenOrgan", - "specimenOrganPart", - "submissionDate", - "tissueAtlas", - "updateDate", - "workflow" - ] - }, - "description": "\nThe field to sort the hits by. The default value depends on the\nentity type.\n" - }, - { - "name": "order", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "asc", - "desc" - ] - }, - "description": "\nThe ordering of the sorted hits, either ascending or descending.\nThe default value depends on the entity type.\n" - }, - { - "name": "search_before", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_before_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - }, - { - "name": "search_after_uid", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nUse the `next` and `previous` properties of the\n`pagination` response element to navigate between pages.\n", - "deprecated": true - } - ], - "responses": { - "200": { - "description": "\nPaginated list of entities that meet the search criteria\n(\"hits\"). The structure of these hits is documented under\nthe [corresponding endpoint for a specific\nentity](#operations-Index-get_index__entity_type___entity_id_).\n\nThe `pagination` section describes the total number of hits\nand total number of pages, as well as user-supplied search\nparameters for page size and sorting behavior. It also\nprovides links for navigating forwards and backwards between\npages of results.\n\nThe `termFacets` section tabulates the occurrence of unique\nvalues within nested fields of the `hits` section across all\nentities meeting the filter criteria (this includes entities\nnot listed on the current page, meaning that this section\nwill be invariable across all pages from the same search).\nNot every nested field is tabulated, but the set of\ntabulated fields is consistent between entity types.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "hits": { - "type": "array", - "items": { - "type": "object", - "properties": { - "protocols": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "entryId": { - "type": "string" - }, - "sources": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "samples": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "specimens": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "cellLines": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "donorOrganisms": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "organoids": { - "type": "array", - "items": { - "type": "string" - } - }, - "cellSuspensions": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - } - }, - "required": [ - "protocols", - "entryId", - "sources", - "samples", - "specimens", - "cellLines", - "donorOrganisms", - "organoids", - "cellSuspensions" - ], - "additionalProperties": true - } - }, - "pagination": { - "type": "object", - "properties": {}, - "additionalProperties": true - }, - "termFacets": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "required": [ - "hits", - "pagination", - "termFacets" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/index/summary": { - "head": { - "summary": "Perform a query without returning its result.", - "tags": [ - "Index" - ], - "responses": { - "200": { - "description": "\nThe HEAD method can be used to test whether an index is\noperational, or to check the validity of query parameters\nfor the [GET method](#operations-Index-get_index_summary).\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - } - ] - }, - "get": { - "summary": "Statistics on the data present across all entities.", - "responses": { - "200": { - "description": "\nCounts the total number and total size in bytes of assorted\nentities, subject to the provided filters.\n\n`fileTypeSummaries` provides the count and total size in\nbytes of files grouped by their format, e.g. \"fastq\" or\n\"matrix.\" `fileCount` and `totalFileSize` compile these\nfigures across all file formats. Likewise,\n`cellCountSummaries` counts cells and their associated\ndocuments grouped by organ type, with `organTypes` listing\nall referenced organs.\n\nTotal counts of unique entities are also provided for other\nentity types such as projects and tissue donors. These\nvalues are not grouped/aggregated.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "organTypes": { - "type": "array", - "items": { - "type": "string" - } - }, - "totalFileSize": { - "type": "number", - "format": "double" - }, - "fileTypeSummaries": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "cellCountSummaries": { - "type": "array", - "items": { - "type": "object", - "properties": {}, - "additionalProperties": true - } - }, - "donorCount": { - "type": "integer", - "format": "int64" - }, - "fileCount": { - "type": "integer", - "format": "int64" - }, - "labCount": { - "type": "integer", - "format": "int64" - }, - "projectCount": { - "type": "integer", - "format": "int64" - }, - "speciesCount": { - "type": "integer", - "format": "int64" - }, - "specimenCount": { - "type": "integer", - "format": "int64" - } - }, - "required": [ - "organTypes", - "totalFileSize", - "fileTypeSummaries", - "cellCountSummaries", - "donorCount", - "fileCount", - "labCount", - "projectCount", - "speciesCount", - "specimenCount" - ], - "additionalProperties": true - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - }, - "tags": [ - "Index" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - } - ] - } - }, - "/manifest/files": { - "put": { - "tags": [ - "Manifests" - ], - "summary": "Initiate the preparation of a manifest", - "description": "\nCreate a manifest preparation job, returning either\n\n- a 301 redirect to the URL of the status of that job or\n\n- a 302 redirect to the URL of an already prepared manifest.\n\nThis endpoint is not suitable for interactive use via the\nSwagger UI. Please use [PUT /fetch/manifest/files][1] instead.\n\n[1]: #operations-Manifests-put_fetch_manifest_files\n\nAny of the query parameters documented below can alternatively be passed\nas a property of a JSON object in the body of the request. This can be\nuseful in case the value of the `filters` query parameter causes the URL\nto exceed the maximum length of 8192 characters, resulting in a 413\nRequest Entity Too Large response.\n\nThe request `PUT /manifest/files?filters={\u2026}`, for example, is equivalent to `PUT /manifest/files`\nwith the body `{\"filters\": \"{\u2026}\"}` in which any double quotes or\nbackslash characters inside `\u2026` are escaped with another backslash. That\nescaping is the requisite procedure for embedding one JSON structure\ninside another.\n", - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - }, - { - "name": "format", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "compact", - "terra.pfb", - "curl", - "verbatim.jsonl", - "verbatim.pfb" - ] - }, - "description": "\nThe desired format of the output.\n\n- `compact` (the default) for a compact,\n tab-separated manifest\n\n- `terra.pfb` for a manifest in the [PFB\n format][2]. This format is mainly used for exporting data to\n Terra.\n\n- `curl` for a [curl configuration\n file][3] manifest. This manifest can be used with the curl\n program to download all the files listed in the manifest.\n\n- `verbatim.jsonl` for a verbatim\n manifest in [JSONL][4] format. Each line contains an\n unaltered metadata entity from the underlying repository.\n\n- `verbatim.pfb` for a verbatim\n manifest in the [PFB format][2]. This format is mainly\n used for exporting data to Terra.\n\n[1]: https://software.broadinstitute.org/firecloud/documentation/article?id=10954\n\n[2]: https://github.com/uc-cdis/pypfb\n\n[3]: https://curl.haxx.se/docs/manpage.html#-K\n\n[4]: https://jsonlines.org/\n" - } - ], - "responses": { - "301": { - "description": "\nA redirect indicating that the manifest preparation job\nhas started. Wait for\nthe recommended number of seconds (see `Retry-After`\nheader) and then follow the redirect to check the status\nof that job.\n", - "headers": { - "Location": { - "description": "\nThe URL of the manifest preparation job at\nthe [`GET\n /manifest/files/{token}`][2] endpoint.\n\n [2]: #operations-Manifests-get_fetch_manifest_files_token\n", - "schema": { - "type": "string", - "format": "url" - } - }, - "Retry-After": { - "description": "\nThe recommended number of seconds to wait before\nrequesting the URL specified in the `Location`\nheader\n", - "schema": { - "type": "string" - } - } - } - }, - "302": { - "description": "\nA redirect indicating that the manifest preparation job\nis already done. Immediately\nfollow the redirect to obtain the manifest contents.\n\nThe response body contains, for a number of commonly\nused shells, a command line suitable for downloading the\nmanifest.\n", - "headers": { - "Location": { - "description": "The URL of the manifest.\n Clients should not make any assumptions about\n any parts of the returned domain, except that\n the scheme will be `https`.\n", - "schema": { - "type": "string", - "format": "url" - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/manifest/files/{token}": { - "parameters": [ - { - "name": "token", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "\nAn opaque string representing the manifest preparation job\n" - } - ], - "get": { - "tags": [ - "Manifests" - ], - "summary": "Determine status of a manifest preparation job", - "description": "\nCheck on the status of an ongoing manifest preparation job,\nreturning either\n\n- a 301 redirect to this endpoint if the manifest job is still\n running\n\n- a 302 redirect to the URL of the completed manifest.\n\nThis endpoint is not suitable for interactive use via the\nSwagger UI. Please use [GET /fetch/manifest/files/{token}][1]\ninstead.\n\n[1]: #operations-Manifests-get_fetch_manifest_files\n", - "parameters": [], - "responses": { - "301": { - "description": "\nA redirect indicating that the manifest preparation job\nis running. Wait for\nthe recommended number of seconds (see `Retry-After`\nheader) and then follow the redirect to check the status\nof the job again.\n", - "headers": { - "Location": { - "description": "\nThe URL of this endpoint\n", - "schema": { - "type": "string", - "format": "url" - } - }, - "Retry-After": { - "description": "\nThe recommended number of seconds to wait before\nrequesting the URL specified in the `Location`\nheader\n", - "schema": { - "type": "string" - } - } - } - }, - "302": { - "description": "\nA redirect indicating that the manifest preparation job\nis now done. Immediately\nfollow the redirect to obtain the manifest contents.\n\nThe response body contains, for a number of commonly\nused shells, a command line suitable for downloading the\nmanifest.\n", - "headers": { - "Location": { - "description": "The URL of the manifest.\n Clients should not make any assumptions about\n any parts of the returned domain, except that\n the scheme will be `https`.\n", - "schema": { - "type": "string", - "format": "url" - } - } - } - }, - "410": { - "description": "\nThe manifest preparation job has expired. Request a\nnew preparation using the `PUT /manifest/files`\nendpoint.\n" - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/fetch/manifest/files": { - "put": { - "tags": [ - "Manifests" - ], - "summary": "Initiate the preparation of a manifest via XHR", - "description": "\nCreate a manifest preparation job, returning a 200 status\nresponse whose JSON body emulates the HTTP headers that would be\nfound in a response to an equivalent request to the [PUT\n/manifest/files][1] endpoint.\n\nWhenever client-side JavaScript code is used in a web\napplication to request the preparation of a manifest from Azul,\nthis endpoint should be used instead of [PUT\n/manifest/files][1]. This way, the client can use XHR to make\nthe request, retaining full control over the handling of\nredirects and enabling the client to bypass certain limitations\non the native handling of redirects in web browsers. For\nexample, most browsers ignore the `Retry-After` header in\nredirect responses, causing them to prematurely exhaust the\nupper limit on the number of consecutive redirects, before the\nmanifest generation job is done.\n\n[1]: #operations-Manifests-put_manifest_files\n\nAny of the query parameters documented below can alternatively be passed\nas a property of a JSON object in the body of the request. This can be\nuseful in case the value of the `filters` query parameter causes the URL\nto exceed the maximum length of 8192 characters, resulting in a 413\nRequest Entity Too Large response.\n\nThe request `PUT /fetch/manifest/files?filters={\u2026}`, for example, is equivalent to `PUT /fetch/manifest/files`\nwith the body `{\"filters\": \"{\u2026}\"}` in which any double quotes or\nbackslash characters inside `\u2026` are escaped with another backslash. That\nescaping is the requisite procedure for embedding one JSON structure\ninside another.\n", - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "filters", - "in": "query", - "required": false, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "accessions": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "namespace": { - "type": "string", - "nullable": true - }, - "accession": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateLastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateSubmissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "aggregateUpdateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "assayType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "biologicalSex": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bionetworkName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleUuid": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "bundleVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "cellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "cellLineType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contactName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "contentDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "dataUseRestriction": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "developmentStage": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "donorCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "donorDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "duosId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "effectiveCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "effectiveOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "entryId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileFormat": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileName": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileSize": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "fileSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "fileVersion": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "genusSpecies": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "institution": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "instrumentManufacturerModel": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isIntermediate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "isTissueAtlasProject": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "laboratory": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "lastModifiedDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "libraryConstructionApproach": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "matrixCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "modelOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "modelOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "nucleicAcidSource": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organ": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAge": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "unit": { - "type": "string" - } - }, - "required": [ - "value", - "unit" - ], - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "organismAgeRange": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number", - "format": "double" - }, - { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - ] - } - } - }, - "required": [ - "contains" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "number", - "format": "double" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "intersects" - ], - "additionalProperties": false - } - ] - }, - "pairedEnd": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "preservationMethod": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "project": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectDescription": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectEstimatedCellCount": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "integer", - "format": "int64", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer", - "format": "int64" - }, - "minItems": 2, - "maxItems": 2 - } - } - }, - "required": [ - "within" - ], - "additionalProperties": false - } - ] - }, - "projectId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "projectTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "publicationTitle": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleEntityType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sampleId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "selectedCellType": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceId": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "sourceSpec": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenDisease": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrgan": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "specimenOrganPart": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "submissionDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "tissueAtlas": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "object", - "properties": { - "atlas": { - "type": "string", - "nullable": true - }, - "version": { - "type": "string", - "nullable": true - } - }, - "additionalProperties": false - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "updateDate": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "workflow": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "string", - "nullable": true - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - "accessible": { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": { - "type": "boolean" - } - } - }, - "required": [ - "is" - ], - "additionalProperties": false - } - }, - "default": "{}", - "example": { - "cellCount": { - "within": [ - [ - 10000, - 1000000000 - ] - ] - } - } - } - } - }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n" - }, - { - "name": "format", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "compact", - "terra.pfb", - "curl", - "verbatim.jsonl", - "verbatim.pfb" - ] - }, - "description": "\nThe desired format of the output.\n\n- `compact` (the default) for a compact,\n tab-separated manifest\n\n- `terra.pfb` for a manifest in the [PFB\n format][2]. This format is mainly used for exporting data to\n Terra.\n\n- `curl` for a [curl configuration\n file][3] manifest. This manifest can be used with the curl\n program to download all the files listed in the manifest.\n\n- `verbatim.jsonl` for a verbatim\n manifest in [JSONL][4] format. Each line contains an\n unaltered metadata entity from the underlying repository.\n\n- `verbatim.pfb` for a verbatim\n manifest in the [PFB format][2]. This format is mainly\n used for exporting data to Terra.\n\n[1]: https://software.broadinstitute.org/firecloud/documentation/article?id=10954\n\n[2]: https://github.com/uc-cdis/pypfb\n\n[3]: https://curl.haxx.se/docs/manpage.html#-K\n\n[4]: https://jsonlines.org/\n" - } - ], - "responses": { - "200": { - "description": "\nWhen handling this response, clients should wait the\nnumber of seconds given in the `Retry-After` property of\nthe response body and then make another XHR request to\nthe URL specified in the `Location` property.\n\nFor a detailed description of these properties see the\ndocumentation for the respective response headers\ndocumented under \n[PUT /manifest/files][1].\n\n[1]: #operations-Manifests-put_manifest_files\n\n\nNote: For a 200 status code response whose body has the\n`Status` property set to 302, the `Location` property\nmay reference the [GET /manifest/files/{token}][2]\nendpoint and that endpoint may return yet another\nredirect, this time a genuine (not emulated) 302 status\nredirect to the actual location of the manifest.\n\n[2]: #operations-Manifests-get_manifest_files\n\nNote: A 200 status response with a `Status` property of\n302 in its body additionally contains a `CommandLine`\nproperty that lists, for a number of commonly used\nshells, a command line suitable for downloading the\nmanifest.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "Status": { - "type": "integer", - "format": "int64" - }, - "Location": { - "type": "string", - "format": "url" - }, - "Retry-After": { - "type": "integer", - "format": "int64" - }, - "CommandLine": { - "type": "object", - "properties": { - "cmd.exe": { - "type": "string" - }, - "bash": { - "type": "string" - } - }, - "required": [ - "cmd.exe", - "bash" - ], - "additionalProperties": false - } - }, - "required": [ - "Status", - "Location" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/fetch/manifest/files/{token}": { - "parameters": [ - { - "name": "token", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "\nAn opaque string representing the manifest preparation job\n" - } - ], - "get": { - "tags": [ - "Manifests" - ], - "summary": "Determine status of a manifest preparation job via XHR", - "description": "\nCheck on the status of an ongoing manifest preparation job,\nreturning a 200 status response whose JSON body emulates the\nHTTP headers that would be found in a response to an equivalent\nrequest to the [GET /manifest/files/{token}][1] endpoint.\n\nWhenever client-side JavaScript code is used in a web\napplication to request the preparation of a manifest from Azul,\nthis endpoint should be used instead of [GET\n/manifest/files/{token}][1]. This way, the client can use XHR to\nmake the request, retaining full control over the handling of\nredirects and enabling the client to bypass certain limitations\non the native handling of redirects in web browsers. For\nexample, most browsers ignore the `Retry-After` header in\nredirect responses, causing them to prematurely exhaust the\nupper limit on the number of consecutive redirects, before the\nmanifest generation job is done.\n\n[1]: #operations-Manifests-get_manifest_files\n", - "parameters": [], - "responses": { - "200": { - "description": "\nWhen handling this response, clients should wait the\nnumber of seconds given in the `Retry-After` property of\nthe response body and then make another XHR request to\nthe URL specified in the `Location` property.\n\nFor a detailed description of these properties see the\ndocumentation for the respective response headers\ndocumented under \n[GET /manifest/files/{token}][1].\n\n[1]: #operations-Manifests-get_manifest_files\n\n\nNote: For a 200 status code response whose body has the\n`Status` property set to 302, the `Location` property\nmay reference the [GET /manifest/files/{token}][2]\nendpoint and that endpoint may return yet another\nredirect, this time a genuine (not emulated) 302 status\nredirect to the actual location of the manifest.\n\n[2]: #operations-Manifests-get_manifest_files\n\nNote: A 200 status response with a `Status` property of\n302 in its body additionally contains a `CommandLine`\nproperty that lists, for a number of commonly used\nshells, a command line suitable for downloading the\nmanifest.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "Status": { - "type": "integer", - "format": "int64" - }, - "Location": { - "type": "string", - "format": "url" - }, - "Retry-After": { - "type": "integer", - "format": "int64" - }, - "CommandLine": { - "type": "object", - "properties": { - "cmd.exe": { - "type": "string" - }, - "bash": { - "type": "string" - } - }, - "required": [ - "cmd.exe", - "bash" - ], - "additionalProperties": false - } - }, - "required": [ - "Status", - "Location" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/repository/files/{file_uuid}": { - "get": { - "tags": [ - "Repository" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "file_uuid", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The UUID of the file to be returned." - }, - { - "name": "version", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nThe version of the file to be returned. File versions are opaque\nstrings with only one documented property: they can be\nlexicographically compared with each other in order to determine\nwhich version is more recent. If this parameter is omitted then the\nmost recent version of the file is returned.\n" - }, - { - "name": "fileName", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nThe desired name of the file. The given value will be included\nin the Content-Disposition header of the response. If absent, a\nbest effort to determine the file name from metadata will be\nmade. If that fails, the UUID of the file will be used instead.\n" - }, - { - "name": "wait", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64" - }, - "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n" - }, - { - "name": "replica", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nIf the underlying repository offers multiple replicas of the\nrequested file, use the specified replica. Otherwise, this\nparameter is ignored. If absent, the only replica \u2014 for\nrepositories that don't support replication \u2014 or the default\nreplica \u2014 for those that do \u2014 will be used.\n" - }, - { - "name": "requestIndex", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64" - }, - "description": "Do not use. Reserved for internal purposes." - }, - { - "name": "drsUri", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "Do not use. Reserved for internal purposes." - }, - { - "name": "token", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "Reserved. Do not pass explicitly." - } - ], - "summary": "Redirect to a URL for downloading a given data file from the underlying repository", - "description": "\nThis endpoint is not suitable for interactive use via the Swagger\nUI. Please use the [/fetch endpoint][1] instead.\n\n[1]: #operations-Repository-get_fetch_repository_files__file_uuid_\n", - "responses": { - "301": { - "description": "\nA URL to the given file is still being prepared. Retry by\nwaiting the number of seconds specified in the `Retry-After`\nheader of the response and the requesting the URL specified\nin the `Location` header.\n", - "headers": { - "Location": { - "schema": { - "type": "string" - }, - "description": "\nA URL pointing back at this endpoint, potentially with\ndifferent or additional request parameters.\n" - }, - "Retry-After": { - "schema": { - "type": "integer", - "format": "int64" - }, - "description": "\nRecommended number of seconds to wait before requesting\nthe URL specified in the `Location` header. The response\nmay carry this header even if server-side waiting was\nrequested via `wait=1`.\n" - } - } - }, - "302": { - "description": "\nThe file can be downloaded from the URL returned in the\n`Location` header.\n", - "headers": { - "Location": { - "schema": { - "type": "string" - }, - "description": "\nA URL that will yield the actual content of the file.\n" - }, - "Content-Disposition": { - "schema": { - "type": "string" - }, - "description": "\nSet to a value that makes user agents download the file\ninstead of rendering it, suggesting a meaningful name\nfor the downloaded file stored on the user's file\nsystem. The suggested file name is taken from the\n`fileName` request parameter or, if absent, from\nmetadata describing the file. It generally does not\ncorrelate with the path component of the URL returned in\nthe `Location` header.\n" - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/fetch/repository/files/{file_uuid}": { - "get": { - "tags": [ - "Repository" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - }, - { - "name": "file_uuid", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "The UUID of the file to be returned." - }, - { - "name": "version", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nThe version of the file to be returned. File versions are opaque\nstrings with only one documented property: they can be\nlexicographically compared with each other in order to determine\nwhich version is more recent. If this parameter is omitted then the\nmost recent version of the file is returned.\n" - }, - { - "name": "fileName", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nThe desired name of the file. The given value will be included\nin the Content-Disposition header of the response. If absent, a\nbest effort to determine the file name from metadata will be\nmade. If that fails, the UUID of the file will be used instead.\n" - }, - { - "name": "wait", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64" - }, - "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n" - }, - { - "name": "replica", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "\nIf the underlying repository offers multiple replicas of the\nrequested file, use the specified replica. Otherwise, this\nparameter is ignored. If absent, the only replica \u2014 for\nrepositories that don't support replication \u2014 or the default\nreplica \u2014 for those that do \u2014 will be used.\n" - }, - { - "name": "requestIndex", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int64" - }, - "description": "Do not use. Reserved for internal purposes." - }, - { - "name": "drsUri", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "Do not use. Reserved for internal purposes." - }, - { - "name": "token", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "Reserved. Do not pass explicitly." - } - ], - "summary": "Request a URL for downloading a given data file", - "responses": { - "200": { - "description": "\nEmulates the response code and headers of\n/repository/files/{file_uuid} while bypassing the default\nuser agent behavior. Note that the status code of a\nsuccessful response will be 200 while the `Status` field of\nits body will be 302.\n\nThe response described here is intended to be processed by\nclient-side Javascript such that the emulated headers can be\nhandled in Javascript rather than relying on the native\nimplementation by the web browser.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "Status": { - "type": "integer", - "format": "int64" - }, - "Location": { - "type": "string" - } - }, - "required": [ - "Status", - "Location" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - }, - "/repository/sources": { - "get": { - "summary": "List available data sources", - "tags": [ - "Repository" - ], - "parameters": [ - { - "name": "catalog", - "in": "query", - "required": false, - "schema": { - "type": "string", - "enum": [ - "dcp2" - ], - "default": "dcp2" - }, - "description": "The name of the catalog to query." - } - ], - "responses": { - "200": { - "description": "\nList the sources the currently authenticated user is\nauthorized to access in the underlying data repository.\n", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "sources": { - "type": "array", - "items": { - "type": "object", - "properties": { - "sourceId": { - "type": "string" - }, - "sourceSpec": { - "type": "string" - } - }, - "required": [ - "sourceId", - "sourceSpec" - ], - "additionalProperties": false - } - } - }, - "required": [ - "sources" - ], - "additionalProperties": false - } - } - } - }, - "504": { - "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n" - } - } - } - } - }, - "servers": [ - { - "url": "http://localhost/" - } - ], - "components": { - "securitySchemes": { - "azul-service-dev": { - "type": "oauth2", - "flows": { - "implicit": { - "authorizationUrl": "https://accounts.google.com/o/oauth2/auth", - "scopes": { - "email": "email" - } - } - } - } - } - }, - "security": [ - {}, - { - "azul-service-dev": [ - "email" - ] - } - ] -} \ No newline at end of file diff --git a/lambdas/service/vendor/azul b/lambdas/service/vendor/azul deleted file mode 120000 index b3310b2b5d..0000000000 --- a/lambdas/service/vendor/azul +++ /dev/null @@ -1 +0,0 @@ -../../../src/azul \ No newline at end of file diff --git a/lambdas/service/vendor/humancellatlas b/lambdas/service/vendor/humancellatlas deleted file mode 120000 index bf4bcf48ff..0000000000 --- a/lambdas/service/vendor/humancellatlas +++ /dev/null @@ -1 +0,0 @@ -../../../src/humancellatlas \ No newline at end of file diff --git a/lambdas/service/vendor/resources/environ.json.template.py b/lambdas/service/vendor/resources/environ.json.template.py deleted file mode 100644 index f77298a3aa..0000000000 --- a/lambdas/service/vendor/resources/environ.json.template.py +++ /dev/null @@ -1,8 +0,0 @@ -from azul import ( - config, -) -from azul.template import ( - emit, -) - -emit(config.lambda_env_for_outsourcing) diff --git a/lambdas/service/vendor/resources/static/swagger b/lambdas/service/vendor/resources/static/swagger deleted file mode 120000 index 7c782ec5ff..0000000000 --- a/lambdas/service/vendor/resources/static/swagger +++ /dev/null @@ -1 +0,0 @@ -../../../../../swagger/ \ No newline at end of file diff --git a/schemas/mirror/info/v1.json b/schemas/mirror/info/v1.json deleted file mode 100644 index fa7429aa4a..0000000000 --- a/schemas/mirror/info/v1.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "info", - "description": "Information describing a file mirrored by Azul", - "type": "object", - "properties": { - "content-type": { - "type": "string", - "description": "Content type of the mirrored file, as defined for the HTTP response header of the same name" - }, - "$schema": { - "type": "string", - "format": "uri", - "pattern": "^https?://.*/info/v\\d+\\.json$", - "description": "URL of a JSON schema the JSON containing this property is valid against" - } - }, - "required": [ - "content-type", - "$schema" - ] -} \ No newline at end of file diff --git a/security.txt b/security.txt deleted file mode 100644 index 4420b84af9..0000000000 --- a/security.txt +++ /dev/null @@ -1,2 +0,0 @@ -If you'd like to report a security issue please contact us -Contact: security-leads@data.humancellatlas.org \ No newline at end of file diff --git a/src/azul/__main__.py b/src/azul/__main__.py deleted file mode 100644 index 37befb1f1c..0000000000 --- a/src/azul/__main__.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Evaluate an expression after 'from azul import config, docker' and either print -the result or return it via the process exit status. -""" -import argparse -import logging -import sys - -from azul import ( - config, - docker, -) -from azul.logging import ( - configure_script_logging, -) - -log = logging.getLogger(__name__) -configure_script_logging() -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('expression', - help='the Python expression to evaluate') -group = parser.add_mutually_exclusive_group() -for status in (True, False): - lower = str(status).lower() - group.add_argument('--' + lower, '-' + lower[0], - dest='status', - default=None, - action='store_' + lower, - help=f'do not print the result of the evaluation but instead ' - f'exit with a status of 0 if the result is {status}-ish or ' - f'a non-zero exit status otherwise.') -args = parser.parse_args(sys.argv[1:]) -locals = dict(config=config, docker=docker) -result = eval(args.expression, dict(__builtins__={}), locals) -log.info('Expression %r evaluated to %r', args.expression, result) -if args.status is None: - print(result) -else: - sys.exit(0 if bool(result) == args.status else 1) diff --git a/src/azul/attrs.py b/src/azul/attrs.py deleted file mode 100644 index 2c35934f4e..0000000000 --- a/src/azul/attrs.py +++ /dev/null @@ -1,860 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from itertools import ( - count, -) -import logging -from types import ( - UnionType, -) -from typing import ( - Any, - Callable, - Iterator, - Optional, - Self, - Tuple, - TypeAliasType, - TypeVar, - TypedDict, - Union, - final, - get_args, - get_origin, -) -from uuid import ( - UUID, -) - -import attrs -from more_itertools import ( - flatten, - one, -) - -from azul import ( - R, - cached_property, - config, - require, -) -from azul.json import ( - PolymorphicSerializable, - Serializable, -) -from azul.types import ( - AnyJSON, - CompositeJSON, - JSON, - JSONArray, - MutableCompositeJSON, - MutableJSON, - MutableJSONArray, - PrimitiveJSON, - derived_type_params, - json_mapping, - not_none, - reify, -) - -log = logging.getLogger(__name__) - - -def strict_auto(*args, **kwargs): - """ - A field that uses the annotated type for validation. - - See :func:`as_annotated` for details - """ - return attrs.field(*args, validator=as_annotated(), **kwargs) - - -def as_annotated(): - """ - Returns a validator that verifies that a field's value is of the annotated - type. Has some limited magic for parameterized types such as typing.Union - and typing.Optional. - - >>> from azul.types import AnyJSON - >>> @attrs.define - ... class Foo: - ... x: Optional[bool] = strict_auto() - ... y: AnyJSON = strict_auto() - - >>> Foo(x=None, y={}), Foo(x=True, y=[]), Foo(x=False, y='foo') - (Foo(x=None, y={}), Foo(x=True, y=[]), Foo(x=False, y='foo')) - - >>> # noinspection PyTypeChecker - >>> Foo(x='foo', y={}) - Traceback (most recent call last): - ... - TypeError: ('x', 'foo', (, )) - - >>> # noinspection PyTypeChecker - >>> Foo(x=None, y=set()) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - TypeError: ('y', set(), (, - , , , - , , )) - - Note that you cannot share one return value of this function between more - than one field. - - >>> validator = as_annotated() - >>> @attrs.define - ... class Bar: - ... x: int = attrs.field(validator=validator) - ... y: str = attrs.field(validator=validator) - >>> Bar(x=1, y='') - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.RequirementError: ('Validator cannot be shared among fields', - Attribute(name='x', default=NOTHING, validator=as_annotated(), repr=True, - eq=True, eq_key=None, order=True, order_key=None, hash=None, init=True, - metadata=mappingproxy({}), type=, converter=None, - kw_only=False, inherited=False, on_setattr=None, alias='x'), - Attribute(name='y', default=NOTHING, validator=as_annotated(), repr=True, - eq=True, eq_key=None, order=True, order_key=None, hash=None, init=True, - metadata=mappingproxy({}), type=, converter=None, - kw_only=False, inherited=False, on_setattr=None, alias='y')) - - Unfortunately, this sharing violation is currently detected very late, - during the first instantiation of a class that reuses a validator. - - >>> validator = as_annotated() - >>> @attrs.define - ... class Bar: - ... x: int = attrs.field(validator=validator) - >>> @attrs.define - ... class Foo: - ... y: str = attrs.field(validator=validator) - >>> Bar(x=1) - Bar(x=1) - >>> Foo(y='') - ... # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - azul.RequirementError: ('Validator cannot be shared among fields', ... - - """ - return _AsAnnotated() - - -class _AsAnnotated: - _cache: Optional[Tuple[attrs.Attribute, Union[type, Tuple[type]]]] = None - - def __call__(self, _instance, field, value): - reified_type = self._reify(field) - if not isinstance(value, reified_type): - raise TypeError(field.name, value, reified_type) - - def _reify(self, field): - # reify() isn't exactly cheap so we'll cache its result - if self._cache is None: - reified_types = reify(field.type) - self._cache = field, reified_types - else: - cached_field, reified_types = self._cache - require(cached_field == field, - 'Validator cannot be shared among fields', cached_field, field) - return reified_types - - def __repr__(self): - return 'as_annotated()' - - -def is_uuid(version): - def validator(_instance, field, value): - if not isinstance(value, UUID) or value.version != version: - raise TypeError(f'Not a UUID{version}', field.name, value) - - return validator - - -type Source = list[str | tuple[str, ...] | Source] - -type FromJSON = Callable[[AnyJSON], Any] -type ToJSON = Callable[[Any], AnyJSON] - - -class SerializableAttrs(Serializable, attrs.AttrsInstance): - """ - >>> @attrs.frozen(kw_only=True) - ... class InnerBase(SerializableAttrs): - ... x: int - - >>> @attrs.frozen(kw_only=True) - ... class MiddleInner[T](InnerBase): - ... y: T | None - - >>> @attrs.frozen(kw_only=True) - ... class Inner(MiddleInner[str]): ... - - >>> @attrs.frozen(kw_only=True) - ... class OuterBase[X, T: InnerBase](SerializableAttrs): - ... inner: list[T] | None - - >>> class MiddleOuter[X](OuterBase[X, Inner]): ... - - >>> class Outer(MiddleOuter[float]): ... - - >>> outer = Outer(inner=[Inner(x=1, y='b')]) - >>> outer.to_json() - {'inner': [{'x': 1, 'y': 'b'}]} - - >>> Outer.from_json(outer.to_json()) - Outer(inner=[Inner(x=1, y='b')]) - - >>> Outer(inner=None).to_json() - {'inner': None} - - >>> Outer.from_json({'inner': None}) - Outer(inner=None) - - >>> Outer.from_json({'inner': [{'x': 'bad', 'y': 'b'}]}) - Traceback (most recent call last): - ... - ValueError: ('Invalid type of value', , 'expecting', ) - - >>> Outer.from_json({'inner': [{'x': 1, 'y': None}]}) - Outer(inner=[Inner(x=1, y=None)]) - - A class with custom serialization (float serialized as string): - - >>> @attrs.frozen(kw_only=True) - ... class CustomBase(SerializableAttrs): - ... x: float - ... - ... def to_json(self) -> JSON: - ... return super().to_json() | {'x': str(self.x)} - ... - ... @classmethod - ... def _from_json(cls, json: JSON) -> dict[str, Any]: - ... return dict(super()._from_json(json), x=float(json['x'])) - - >>> @attrs.frozen(kw_only=True) - ... class Custom(CustomBase): - ... y: str - - >>> Custom(x=1.23, y='y').to_json() - {'x': '1.23', 'y': 'y'} - - >>> Custom.from_json({'x': '1.23', 'y': 'y'}) - Custom(x=1.23, y='y') - - >>> @attrs.frozen(kw_only=True) - ... class Embedded(SerializableAttrs): - ... x: JSON - - >>> Embedded(x={'y': 12}).to_json() - {'x': {'y': 12}} - - >>> @attrs.frozen(kw_only=True) - ... class WithDicts(SerializableAttrs): - ... inners: dict[int, Inner] - - >>> WithDicts(inners={1: Inner(x=1, y='b')}).to_json() - {'inners': {1: {'x': 1, 'y': 'b'}}} - - >>> WithDicts.from_json({'inners': {1: {'x': 1, 'y': 'b'}}}) - WithDicts(inners={1: Inner(x=1, y='b')}) - """ - - @classmethod - @final - def from_json(cls, json: AnyJSON) -> Self: - cls._assert_concrete() - kwargs = cls._from_json(json_mapping(json)) - return cls(**kwargs) - - @classmethod - def _from_json(cls, json: JSON) -> dict[str, Any]: - """ - Return a dictionary with keyword arguments for the constructor. An - override must call the overridden method via super() but only need to - populate keyword arguments for the fields defined by the class that - overrides the method. Typically, the overrides in subclasses will be - generated automatically but if a subclass explicitly defines an - override, it will be left alone. - """ - return {} - - def to_json(self) -> dict[str, AnyJSON]: - """ - Typically, the overrides in subclasses will be generated automatically - but if a subclass explicitly defines an override, it will be left alone. - """ - self._assert_concrete() - return {} - - @classmethod - def _assert_concrete(cls): - assert not cls._deferred_fields, R( - 'Class has fields of unknown type', cls._deferred_fields) - - def __init_subclass__(cls): - super().__init_subclass__() - try: - fields = attrs.fields(cls) - except attrs.exceptions.NotAnAttrsClassError: - pass - else: - cls._instrument(fields) - - @classmethod - def __attrs_init_subclass__(cls): - cls._instrument(attrs.fields(cls)) - - #: The names of fields that we weren't able to generate code for in this - #: class because at least one of them was annotated with a variable type. - #: Generic descendants that use free type variables in their attrs field - #: annotations override this attribute to a non-empty set. The - #: responsibility to handle deferred fields falls on the descendant that - #: binds the last remaining free type variable. - #: - _deferred_fields: frozenset[str] = frozenset() - - @classmethod - def _instrument(cls, fields: list[attrs.Attribute]): - """ - Add overrides for to_json and _from_json to the given class. The - overrides will handle the serialization and deserialization of the - fields defined by the class, not those that it inherits. An override - will only be added if the class doesn't already provide one. This method - must be idempotent because it may be invoked twice for the same class, - before and after the attrs decorator did its work. Even for slotted - classes this method will be invoked twice, albeit the second time on a - copy of the class. - """ - # When slots=True (the default for attrs.define), attrs makes a copy of - # the class so the subclass hook will be invoked twice, once for the - # original class, and again for the copy. The copy is likely to have - # additional fields defined so we need to start from scratch and reset - # any left-overs that would interfere with that. - # - if cls._has_custom('to_json') and cls._has_custom('_from_json'): - pass - else: - if '_deferred_fields' in cls.__dict__: - del cls._deferred_fields - owned_fields = [ - field - for field in fields - if field.name in cls.__annotations__ or field.name in cls._deferred_fields - ] - if owned_fields: - deferred_fields = cls._make(owned_fields) - if deferred_fields != cls._deferred_fields: - cls._deferred_fields = deferred_fields - - @classmethod - def _make(cls, fields: list[attrs.Attribute]) -> frozenset[str]: - try: - _from_json = cls._make_from_json(fields) - except cls.Strategy.MustDefer: - deferred_fields = frozenset(field.name for field in fields) - else: - cls._define(_from_json) - deferred_fields = frozenset() - to_json = cls._make_to_json(fields) - cls._define(to_json) - return deferred_fields - - @classmethod - def _make_from_json(cls, fields: list[attrs.Attribute]) -> Callable: - globals = {cls.__name__: cls} - deserializers = (cls.Deserializer(cls, field, globals) for field in fields) - source = cls._indent([ - '@classmethod', - 'def _from_json(cls, json):', [ - f'kwargs = super({cls.__name__}, cls)._from_json(json)', - *flatten( - [ - f'x = json["{deserializer.field.name}"]', - *(deserializer.handle('x')), - f'kwargs["{deserializer.field.name}"] = x' - ] - for deserializer in deserializers - if deserializer.enabled - ), - 'return kwargs' - ] - ]) - return cls._compile(source, globals) - - @classmethod - def _make_to_json(cls, fields: list[attrs.Attribute]) -> Callable: - globals = {cls.__name__: cls} - serializers = (cls.Serializer(cls, field, globals) for field in fields) - to_json = cls._indent([ - 'def to_json(self):', [ - # Using the super() shortcut would require messing with the - # ``__closure__`` attribute of the function, and, we assume, - # would be slower. - f'json = super({cls.__name__}, self).to_json()', - *flatten( - [ - f'x = self.{serializer.field.name}', - f'json["{serializer.field.name}"] = ' + serializer.handle('x') - ] - for serializer in serializers - if serializer.enabled - ), - 'return json' - ] - ]) - return cls._compile(to_json, globals) - - @classmethod - def _indent(cls, source: Source, level=0): - """ - Indent and join the given list of source code items. An item can be - either a line, a tuple of words, or a nested list of items. The - indentation of lines is based on the nesting of the lists. Lines are - joined with a newline character, words are joined with a comma. - """ - return '\n'.join( - cls._indent(v, level + 1) - if isinstance(v, list) else - ' ' * level * 4 + (', '.join(v) if isinstance(v, tuple) else v) - for v in source - ) - - @classmethod - def _compile(cls, source: str, globals: dict[str, Any]): - """ - Compile a function definition from the given source & context - """ - if config.debug > 2: - log.debug('Generating code for method in %r with globals %r. ' - 'See next line for body of method.\n%s', cls, globals, source) - bytecode = compile(source, cls.__module__, 'exec') - locals: dict[str, Any] = {} - eval(bytecode, globals, locals) - function = one(locals.values()) - return function - - _method_marker = '__azul_serializable__' - - @classmethod - def _has_custom(cls, method_name): - method = cls.__dict__.get(method_name) - return method is not None and not hasattr(method, cls._method_marker) - - @classmethod - def _define(cls, function: Callable) -> None: - """ - Add the given function as a method of the class to be instrumented - """ - method_name = function.__name__ - custom = cls._has_custom(method_name) - # We should never replace a custom definition. However, an - # instrumentation during attrs' subclass hook must replace - # the definition from the standard subclass hook. - if not custom: - setattr(function, cls._method_marker, None) - setattr(cls, method_name, function) - - @attrs.frozen - class Strategy[T](metaclass=ABCMeta): - cls: type['SerializableAttrs'] - field: attrs.Attribute - globals: dict[str, Any] - depth: Iterator[int] = attrs.field(factory=count) - - class MustDefer(Exception): - pass - - class Custom(TypedDict): - from_json: FromJSON | None - to_json: ToJSON | None - - @cached_property - def custom(self) -> Custom | None: - return self._metadata('custom', None) - - def _metadata[V](self, key: str, default: V) -> V: - try: - return self.field.metadata['azul'][key] - except KeyError: - return default - - @cached_property - def discriminator(self) -> str | None: - return self._metadata('discriminator', None) - - def handle(self, x: str) -> T: - if self.custom is None: - return self._handle(x, self._reify(self.field.type)) - else: - return self._custom(x) - - def _owner(self) -> type: - """ - Find the nearest ancestor that introduced the given field - """ - for base in self.cls.__mro__: - if self.field.name in base.__annotations__: - assert isinstance(base, type) - assert issubclass(base, SerializableAttrs) - return base - assert False - - def _reify(self, field_type: Any) -> Any: - """ - Resolve the type parameters of the given type, or raise - MustDefer if that's not possible. - """ - while isinstance(field_type, TypeVar): - owner = self._owner() - if owner is self.cls: - raise self.MustDefer - params = derived_type_params(self.cls, root=owner) - try: - field_type = params[field_type] - except KeyError: - raise self.MustDefer - return field_type - - embedded_json_types = ( - JSON, - CompositeJSON, - JSONArray, - MutableJSON, - MutableCompositeJSON, - MutableJSONArray - ) - - def _handle(self, x: str, field_type: Any): - if field_type in self.embedded_json_types: - return self._embedded_json(x, one(reify(field_type))) - elif isinstance(field_type, TypeAliasType): - field_type = field_type.__value__ - if isinstance(field_type, type): - if field_type in reify(PrimitiveJSON): - return self._primitive(x, field_type) - elif issubclass(field_type, Serializable): - cls_name = field_type.__name__ - self.globals[cls_name] = field_type - is_polymorphic = issubclass(field_type, PolymorphicSerializable) - has_discriminator = self.discriminator is not None - if is_polymorphic and has_discriminator: - return self._polymorphic(x, cls_name) - else: - return self._serializable(x, cls_name) - else: - origin = get_origin(field_type) - if origin in (Union, UnionType): - arg_types = set(get_args(field_type)) - arg_types.discard(type(None)) - if len(arg_types) == 1: - field_type = self._reify(one(arg_types)) - return self._optional(x, field_type) - elif issubclass(origin, list): - item_type = one(get_args(field_type)) - item_type = self._reify(item_type) - return self._list(x, item_type) - elif issubclass(origin, dict): - key_type, value_type = map(self._reify, get_args(field_type)) - return self._dict(x, key_type, value_type) - raise TypeError('Unserializable field', field_type, self.field) - - @property - @abstractmethod - def enabled(self) -> bool: - raise NotImplementedError - - @abstractmethod - def _primitive(self, x: str, field_type: type) -> T: - raise NotImplementedError - - @abstractmethod - def _embedded_json(self, x: str, field_type: type) -> T: - raise NotImplementedError - - @abstractmethod - def _optional(self, x: str, field_type: type) -> T: - raise NotImplementedError - - @abstractmethod - def _serializable(self, x: str, cls: str) -> T: - raise NotImplementedError - - @abstractmethod - def _polymorphic(self, x: str, base_cls: str) -> T: - raise NotImplementedError - - @abstractmethod - def _list(self, x: str, item_type: type) -> T: - raise NotImplementedError - - @abstractmethod - def _dict(self, x: str, key_type: type, value_type: type) -> T: - raise NotImplementedError - - @abstractmethod - def _custom(self, x: str) -> T: - raise NotImplementedError - - class Deserializer(Strategy[Source]): - - @property - def enabled(self) -> bool: - return self.custom is None or self.custom['from_json'] is not None - - def _optional(self, x: str, field_type: type) -> Source: - return [ - f'if {x} is not None:', self._handle(x, field_type) - ] - - def _serializable(self, x: str, cls: str) -> Source: - return [ - f'{x} = {cls}.from_json({x})' - ] - - def _polymorphic(self, x: str, base_cls: str) -> Source: - depth = next(self.depth) - cls = f'cls{depth}' - return [ - f'{cls} = {x}["{self.discriminator}"]', - f'{cls} = {base_cls}.cls_from_json({cls})', - f'{x} = {cls}.from_json({x})' - ] - - def _primitive(self, x: str, field_type: type) -> Source: - return [ - f'if not isinstance({x}, {field_type.__name__}):', [ - 'raise ValueError(', [( - '"Invalid type of value"', - f'type({x})', - '"expecting"', - field_type.__name__, - )], ')' - ] - ] - - def _embedded_json(self, x: str, field_type: type) -> Source: - self.globals[field_type.__name__] = field_type - return self._primitive(x, field_type) - - def _list(self, x: str, item_type: type) -> Source: - depth = next(self.depth) - l, v = f'l{depth}', f'v{depth}' - return [ - f'{l} = []', - f'for {v} in {x}:', [ - *self._handle(v, item_type), - f'{l}.append({v})' - ], - f'{x} = {l}' - ] - - def _dict(self, x: str, key_type: type, value_type: type) -> Source: - level = next(self.depth) - d, k, v = f'd{level}', f'k{level}', f'v{level}' - return [ - f'{d} = {{}}', - f'for {k},{v} in {x}.items():', [ - *self._handle(k, key_type), - *self._handle(v, value_type), - f'{d}[{k}] = {v}' - ], - f'{x} = {d}' - ] - - def _custom(self, x: str) -> Source: - var_name = self.field.name + '_from_json' - from_json = not_none(not_none(self.custom)['from_json']) - self.globals[var_name] = from_json - return [ - f'{x} = {var_name}({x})' - ] - - class Serializer(Strategy[str]): - - @property - def enabled(self) -> bool: - return self.custom is None or self.custom['to_json'] is not None - - def _primitive(self, x: str, field_type: type) -> str: - return x - - def _embedded_json(self, x: str, field_type: type) -> str: - return x - - def _optional(self, x: str, field_type: type) -> str: - return f'{x} if {x} is None else ({self._handle(x, field_type)})' - - def _serializable(self, x: str, cls: str) -> str: - return f'{x}.to_json()' - - def _polymorphic(self, x: str, base_cls: str) -> str: - return f'dict({x}.to_json(), {self.discriminator}={x}.cls_to_json())' - - def _list(self, x: str, item_type: type) -> str: - depth = next(self.depth) - v = f'v{depth}' - v_ = self._handle(v, item_type) - return f'[({v_}) for {v} in {x}]' - - def _dict(self, x: str, key_type: type, value_type: type) -> str: - level = next(self.depth) - k, v = f'k{level}', f'v{level}' - k_, v_ = self._handle(k, key_type), self._handle(v, value_type) - return f'{{{k_}: {v_} for {k}, {v} in x.items()}}' - - def _custom(self, x: str) -> str: - to_json = not_none(not_none(self.custom)['to_json']) - var_name = self.field.name + '_to_json' - self.globals[var_name] = to_json - return f'{var_name}({x})' - - -def serializable[T: attrs.Attribute](field: T | None = None, - *, - from_json: FromJSON, - to_json: ToJSON) -> T: - """ - Use the provided callables to (de)serialize values of the given field, - instead of generating them. - - >>> @attrs.frozen - ... class Foo(SerializableAttrs): - ... x: set[str] = serializable(to_json=sorted, from_json=set) - - >>> Foo(x={'b','a'}).to_json() - {'x': ['a', 'b']} - - >>> Foo.from_json({'x': ['a']}) - Foo(x={'a'}) - """ - custom = SerializableAttrs.Strategy.Custom(from_json=from_json, - to_json=to_json) - return _set_field_metadata(field, 'custom', custom) - - -def not_serializable[T: attrs.Attribute](field: T) -> T: - """ - Skip the given field during (de)serialization. The field should have a - default value or there should be some other provision for the constructor to - handle the case that no argument will be passed to it for any field that was - marked this way. - - >>> @attrs.frozen - ... class Foo(SerializableAttrs): - ... x: int = not_serializable(attrs.field(default=42)) - - >>> Foo().to_json() - {} - - >>> Foo.from_json({}) - Foo(x=42) - """ - custom = SerializableAttrs.Strategy.Custom(from_json=None, - to_json=None) - return _set_field_metadata(field, 'custom', custom) - - -def _set_field_metadata[T: attrs.Attribute](field: T | None, key, value): - if field is None: - field = attrs.field() - metadata = field.metadata.setdefault('azul', {}) - metadata[key] = value - return field - - -def polymorphic[T: attrs.Attribute](field: T | None = None, - *, - discriminator: str - ) -> T: - """ - Mark an attrs field to use the given name for the discriminator property in - serialized instances of PolymorphicSerializable that occur in the value of - that field. The given discriminator property of a serialized instance - represents the type to use when deserializing that instance again. - - >>> from azul.json import RegisteredPolymorphicSerializable - - >>> class Inner(SerializableAttrs, RegisteredPolymorphicSerializable): - ... pass - - >>> @attrs.frozen - ... class InnerWithInt(Inner): - ... x: int - - >>> @attrs.frozen - ... class InnerWithStr(Inner): - ... y: str - - >>> @attrs.frozen(kw_only=True) - ... class Outer(SerializableAttrs): - ... inner: Inner = polymorphic(discriminator='type') - ... inners: list[Inner] = polymorphic(discriminator='_cls') - - >>> from azul.doctests import assert_json - - >>> outer = Outer(inner=InnerWithInt(42), - ... inners=[InnerWithStr('foo'), InnerWithInt(7)]) - >>> assert_json(outer.to_json()) - { - "inner": { - "x": 42, - "type": "InnerWithInt" - }, - "inners": [ - { - "y": "foo", - "_cls": "InnerWithStr" - }, - { - "x": 7, - "_cls": "InnerWithInt" - } - ] - } - >>> Outer.from_json(outer.to_json()) == outer - True - - In order to enable polymorphic serialization of the value of a given field, - the discriminator property needs to be specified explicitly, otherwise the - serialization framework will resort to the static type of the field. - - >>> @attrs.frozen - ... class GenericOuter[T: Inner](SerializableAttrs): - ... inner: T - - >>> class StaticOuter(GenericOuter[InnerWithInt]): - ... pass - - >>> outer = StaticOuter(InnerWithInt(42)) - >>> outer.to_json() - {'inner': {'x': 42}} - - Despite the fact that ``{'x': 42}`` does not encode any type information, - ``from_json`` can tell from the static type of the field that {'x': 42} - should be deserialized as an ``InnerWithInt``. - - >>> StaticOuter.from_json(outer.to_json()).inner - InnerWithInt(x=42) - - >>> StaticOuter.from_json(outer.to_json()) == outer - True - - However, when the static type of the field is not concrete, deserialization - may fail or, like in this case, lose information by creating an instance of - the parent class instead of the class that was serialized. - - >>> @attrs.frozen - ... class AbstractOuter(SerializableAttrs): - ... inner: Inner - - >>> outer = AbstractOuter(InnerWithInt(42)) - >>> AbstractOuter.from_json(outer.to_json()).inner # doctest: +ELLIPSIS - - """ - return _set_field_metadata(field, 'discriminator', discriminator) diff --git a/src/azul/auth.py b/src/azul/auth.py deleted file mode 100644 index de7b61a132..0000000000 --- a/src/azul/auth.py +++ /dev/null @@ -1,103 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from inspect import ( - isabstract, -) -from typing import ( - ClassVar, -) - -import attr - -from azul.json import ( - copy_json, -) -from azul.types import ( - JSON, -) - - -@attr.s(auto_attribs=True, frozen=True) -class Authentication(metaclass=ABCMeta): - - @abstractmethod - def identity(self) -> str: - """ - A string uniquely identifying the authenticated entity, for at least - some period of time. - """ - raise NotImplementedError - - @abstractmethod - def as_http_header(self) -> str: - """ - A string representing the authenticated entity as an HTTP header - name/value pair. Raises NotImplementedError if the authentication format - does not support such a representation. - """ - raise NotImplementedError - - _cls_field: ClassVar[str] = '_cls' - - def to_json(self) -> JSON: - """ - >>> @attr.s(auto_attribs=True, frozen=True) - ... class Foo(Authentication): - ... foo: str - ... def identity(self) -> str: - ... # noinspection PyUnresolvedReferences - ... return self.foo - ... def as_http_header(self) -> str: - ... raise NotImplementedError - >>> f = Foo('bar') - >>> f - Foo(foo='bar') - >>> f.to_json() - {'foo': 'bar', '_cls': 'Foo'} - >>> Authentication.from_json(f.to_json()) - Foo(foo='bar') - """ - json = attr.asdict(self) - json[self._cls_field] = type(self).__name__ - return json - - @classmethod - def from_json(cls, json: JSON) -> 'Authentication': - json = copy_json(json) - cls_name = json.pop(cls._cls_field) - assert isinstance(cls_name, str) - return cls._cls_for_name[cls_name](**json) - - _cls_for_name: ClassVar[dict[str, type['Authentication']]] = {} - - def __init_subclass__(cls) -> None: - super().__init_subclass__() - if not isabstract(cls): - name = cls.__name__ - assert name not in cls._cls_for_name, cls - assert cls._cls_field not in attr.fields_dict(cls), cls - cls._cls_for_name[name] = cls - - -@attr.s(auto_attribs=True, frozen=True) -class OAuth2(Authentication): - access_token: str - - def identity(self) -> str: - return self.access_token - - def as_http_header(self) -> str: - return f'Authorization: Bearer {self.access_token}' - - -@attr.s(auto_attribs=True, frozen=True) -class HMACAuthentication(Authentication): - key_id: str - - def identity(self) -> str: - return self.key_id - - def as_http_header(self) -> str: - raise NotImplementedError diff --git a/src/azul/azulclient.py b/src/azul/azulclient.py deleted file mode 100644 index 04a9a8e25a..0000000000 --- a/src/azul/azulclient.py +++ /dev/null @@ -1,435 +0,0 @@ -from collections import ( - defaultdict, -) -from collections.abc import ( - Iterable, -) -from concurrent.futures import ( - Future, - ThreadPoolExecutor, -) -from enum import ( - auto, -) -import fnmatch -from functools import ( - partial, -) -import logging -from pprint import ( - PrettyPrinter, -) -from typing import ( - AbstractSet, - cast, -) -import uuid - -import attrs -import requests -from urllib3 import ( - HTTPResponse, -) -from urllib3.exceptions import ( - HTTPError, -) - -from azul import ( - CatalogName, - R, - cached_property, - config, -) -from azul.deployment import ( - aws, -) -from azul.es import ( - ESClientFactory, -) -from azul.hmac import ( - SignatureHelper, -) -from azul.http import ( - HasCachedHttpClient, -) -from azul.indexer import ( - SourceRef, - SourceSpec, -) -from azul.indexer.index_queue_service import ( - IndexQueueService, -) -from azul.indexer.index_repository_service import ( - IndexRepositoryService, -) -from azul.indexer.index_service import ( - IndexService, -) -from azul.plugins import ( - MetadataPlugin, - RepositoryPlugin, -) -from azul.queues import ( - Action, - Queues, - SQSFifoMessage, - SQSMessage, -) -from azul.types import ( - JSON, - JSONs, -) - -log = logging.getLogger(__name__) - - -class MirrorAction(Action): - mirror_source = auto() - mirror_partition = auto() - mirror_file = auto() - mirror_part = auto() - finalize_file = auto() - - -@attrs.frozen(kw_only=True) -class AzulClient(SignatureHelper, HasCachedHttpClient): - num_workers: int = 16 - - @cached_property - def queues(self) -> Queues: - return Queues() - - @cached_property - def index_service(self) -> IndexService: - return IndexService() - - @cached_property - def index_queue_service(self) -> IndexQueueService: - return IndexQueueService() - - @cached_property - def index_repository_service(self) -> IndexRepositoryService: - return IndexRepositoryService() - - def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return self.index_repository_service.repository_plugin(catalog) - - def metadata_plugin(self, catalog: CatalogName) -> MetadataPlugin: - return self.index_service.metadata_plugin(catalog) - - def mirror_source_message(self, - catalog: CatalogName, - source: SourceRef - ) -> SQSFifoMessage: - return SQSFifoMessage( - body={ - 'action': MirrorAction.mirror_source.to_json(), - 'catalog': catalog, - 'source': cast(JSON, source.to_json()), - }, - group_id=source.id - ) - - def local_reindex(self, catalog: CatalogName, prefix: str) -> int: - service = self.index_repository_service - plugin = self.repository_plugin(catalog) - notifications: JSONs = [ - # Notifications sent organically by DSS had a different structure, - # but since DSS is long gone these synthetic notifications are now - # the only variant that would ever occur in the wild. - { - 'transaction_id': str(uuid.uuid4()), - 'bundle_fqid': bundle_fqid.to_json() - } - for source in map(plugin.resolve_source, config.sources(catalog)) - for bundle_fqid in service.list_bundles(catalog, source, prefix) - ] - self.index(catalog, notifications) - return len(notifications) - - def index(self, - catalog: CatalogName, - notifications: Iterable[JSON], - delete: bool = False - ): - errors = defaultdict[int, int](int) - missing = [] - indexed = 0 - total = 0 - path = (catalog, 'delete' if delete else 'add') - indexer_url = config.indexer_endpoint.set(path=path) - - def attempt(notification: JSON, - i: int - ) -> tuple[JSON, None | Future | HTTPResponse | HTTPError]: - log_args = (indexer_url, notification, i) - log.info('Notifying %s about %s, attempt %i.', - *log_args) - # We want to send the request with urllib3 directly but HMAC - # signing is only available for Requests, so we need to prepare a - # request, sign it and then unpack it again before calling urllib3. - request = requests.Request('POST', str(indexer_url), json=notification) - request = request.prepare() - self.sign(request) - try: - result = self._http_client.request(url=request.url, - method=request.method, - headers=request.headers, - body=request.body) - except HTTPError as e: - result = e - - if isinstance(result, HTTPResponse) and result.status == 202: - log.info('Success notifying %s about %s, attempt %i.', - *log_args) - return notification, None - else: - assert isinstance(result, (HTTPResponse, HTTPError)), result - if i < 3: - log.warning('Retrying to notify %s about %s, attempt %i, after error %s.', - *log_args, result) - return notification, tpe.submit(partial(attempt, notification, i + 1)) - else: - log.warning('Failed to notify %s about %s, attempt %i: after error %s.', - *log_args, result) - return notification, result - - def handle_future(future: Future) -> None: - nonlocal indexed - bundle_fqid, result = future.result() - if result is None: - indexed += 1 - elif isinstance(result, HTTPResponse): - errors[result.status] += 1 - missing.append((notification, result.status)) - elif isinstance(result, Future): - # The task scheduled a follow-on task, presumably a retry. - # Follow that new task. - handle_future(result) - else: - assert False - - with ThreadPoolExecutor(max_workers=self.num_workers, - thread_name_prefix='pool') as tpe: - futures = [] - for notification in notifications: - total += 1 - futures.append(tpe.submit(partial(attempt, notification, 0))) - for future in futures: - handle_future(future) - - printer = PrettyPrinter(compact=False) - log.info('Sent notifications for %i of %i bundles for catalog %r.', - indexed, total, catalog) - if errors: - log.error('Number of errors by HTTP status code:\n%s', - printer.pformat(dict(errors))) - if missing: - log.error('Unsent notifications and their HTTP status code:\n%s', - printer.pformat(missing)) - if errors or missing: - raise AzulClientNotificationError - - def matching_sources(self, - catalogs: Iterable[CatalogName], - globs: AbstractSet[str] = frozenset('*') - ) -> dict[CatalogName, set[SourceSpec]]: - result = {} - matched_globs = set() - for catalog in catalogs: - raw_specs = config.sources(catalog) - specs = set(self.repository_plugin(catalog).sources) - if '*' not in globs: - matching_raw_specs: set[str] = set() - for glob in globs: - _matching_raw_specs = fnmatch.filter(raw_specs, glob) - if _matching_raw_specs: - matching_raw_specs.update(_matching_raw_specs) - matched_globs.add(glob) - log.debug('Source glob %r matched sources %r in catalog %r', - glob, _matching_raw_specs, catalog) - specs = {spec for spec in specs if str(spec) in matching_raw_specs} - result[catalog] = specs - unmatched_globs = globs - matched_globs - if unmatched_globs: - log.warning('Source(s) not found in any catalog: %r', unmatched_globs) - assert any(result.values()), R( - 'No valid sources specified for any catalog') - return result - - def mirror_queue(self): - name = config.mirror_queue.name - return aws.sqs_queue(name) - - def queue_mirror_messages(self, messages: Iterable[SQSMessage]) -> int: - return self.queues.send_messages(self.mirror_queue(), messages) - - def delete_all_indices(self, catalog: CatalogName): - self.index_service.delete_indices(catalog) - - def create_all_indices(self, catalog: CatalogName): - self.index_service.create_indices(catalog) - - def delete_bundle(self, catalog: CatalogName, bundle_uuid, bundle_version): - log.info('Deleting bundle %r, version %r in catalog %r.', - bundle_uuid, bundle_version, catalog) - notifications = [ - { - # FIXME: delete_bundle script fails with KeyError: 'source' - # https://github.com/DataBiosphere/azul/issues/5105 - 'bundle_fqid': { - 'uuid': bundle_uuid, - 'version': bundle_version - } - } - ] - self.index(catalog, notifications, delete=True) - - def deindex(self, catalog: CatalogName, sources: Iterable[SourceSpec]): - plugin = self.repository_plugin(catalog) - source_ids = [plugin.resolve_source(s).id for s in sources] - es_client = ESClientFactory.get() - indices = ','.join(map(str, self.index_service.index_names(catalog))) - query = { - 'query': { - 'bool': { - 'should': [ - { - 'terms': { - # Aggregate documents - 'sources.id.keyword': source_ids - } - }, - { - 'terms': { - # Contribution documents - 'source.id.keyword': source_ids - } - } - ] - } - } - } - log.info('Deindexing sources %r from catalog %r', sources, catalog) - log.debug('Using query: %r', query) - response = es_client.delete_by_query(index=indices, body=query, slices='auto') - if len(response['failures']) > 0: - if response['version_conflicts'] > 0: - log.error('Version conflicts encountered. Do not deindex while ' - 'indexing is occurring. The index may now be in an ' - 'inconsistent state.') - raise RuntimeError('Failures during deletion', response['failures']) - - def reset_indexer(self, - catalogs: Iterable[CatalogName], - *, - purge_queues: bool, - delete_indices: bool, - create_indices: bool): - """ - Reset the indexer, to a degree. - - :param catalogs: The catalogs to create and delete indices for. - - :param purge_queues: whether to purge the indexer queues at the - beginning. Note that purging the queues affects - all catalogs, not just the specified one. - - :param delete_indices: whether to delete the indexes before optionally - recreating them - - :param create_indices: whether to create the indexes at the end. - """ - indexer_queues = self.queues.get_queues(config.indexer_work_queue_names) - if purge_queues: - log.info('Disabling lambdas ...') - self.queues.manage_lambdas(indexer_queues, enable=False) - log.info('Purging queues: %s', ', '.join(indexer_queues.keys())) - self.queues.purge_queues_unsafely(indexer_queues) - if delete_indices: - log.info('Deleting indices ...') - for catalog in catalogs: - self.delete_all_indices(catalog) - if purge_queues: - log.info('Re-enabling lambdas ...') - self.queues.manage_lambdas(indexer_queues, enable=True) - if create_indices: - log.info('Creating indices ...') - for catalog in catalogs: - self.create_all_indices(catalog) - - def wait_for_indexer(self): - """ - Wait for indexer to begin processing notifications, then wait for work - to finish. - """ - # Indexing can still succeed after a transient stall. A stall's - # transience cannot be proven until all lambdas and their respective - # retries repeatedly time out, but this would result in an unreasonably - # long wait time. Waiting for just one retry is sufficient to - # accommodate the most probable scenarios for transient stalls. - timeout = max(config.contribution_lambda_timeout(retry=True), - config.aggregation_lambda_timeout(retry=True)) - self.queues.wait_to_stabilize(config.indexer_work_queue_names, - timeout, - detect_stall=True) - - def wait_for_mirroring(self): - self.queues.wait_to_stabilize(config.mirror_work_queue_names, - config.mirror_lambda_timeout, - detect_stall=False) - - def is_queue_empty(self, queue_name: str) -> bool: - queues = self.queues.get_queues([queue_name]) - length, _ = self.queues.get_queue_lengths(queues) - return length == 0 - - def remote_mirror(self, catalog: CatalogName, sources: Iterable[SourceRef]): - - def message(source: SourceRef): - log.info('Mirroring files in source %r from catalog %r', - str(source.spec), catalog) - return self.mirror_source_message(catalog, source) - - messages = map(message, sources) - self.queue_mirror_messages(messages) - - def _get_non_empty_fail_queues(self) -> set[str]: - return { - queue - for queue in config.indexer_fail_queue_names - if not self.is_queue_empty(queue) - } - - _common_fail_queue_msg = ( - "If needed, empty the work queues via 'manage_queues.py purge_indexer'. " - "Then run 'manage_queues.py dump --delete' for each fail queue listed: " - ) - - def require_no_failures_before(self): - queues = self._get_non_empty_fail_queues() - assert 0 == len(queues), R( - 'Cannot begin indexing because a previous operation failed: ' - 'At least one fail queue is not empty. ' + - self._common_fail_queue_msg, - queues - ) - - def require_no_failures_after(self): - queues = self._get_non_empty_fail_queues() - assert 0 == len(queues), R( - 'At least one fail queue is not empty, indicating that there were ' - 'persistent indexer failures. ' + - self._common_fail_queue_msg, - queues - ) - - -class AzulClientError(RuntimeError): - pass - - -class AzulClientNotificationError(AzulClientError): - - def __init__(self) -> None: - super().__init__('Some notifications could not be sent') diff --git a/src/azul/bigquery.py b/src/azul/bigquery.py deleted file mode 100644 index c95179e6f8..0000000000 --- a/src/azul/bigquery.py +++ /dev/null @@ -1,50 +0,0 @@ -from collections.abc import ( - Iterable, - Mapping, -) -from datetime import ( - datetime, -) -import re -from typing import ( - Union, -) - -from azul.strings import ( - back_quote as bq, -) - -BigQueryValue = Union[int, float, bool, str, bytes, datetime, None] -BigQueryRow = Mapping[str, BigQueryValue] -BigQueryRows = Iterable[BigQueryRow] - -identifier_re = r'([a-zA-Z_][a-zA-Z_0-9]*)' -table_name_re = re.compile(fr'{identifier_re}(\.{identifier_re})*') - - -def backtick(table_name: str) -> str: - """ - Return the given string surrounded by backticks if deemed necessary based - on a simplified interpretation of BigQuery's lexical structure and syntax - for identifier tokens. - - https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical - - >>> backtick('foo.bar.my_table') - 'foo.bar.my_table' - - >>> backtick('foo2.bar.my_table') - 'foo2.bar.my_table' - - >>> backtick('foo-2.bar.my_table') - '`foo-2.bar.my_table`' - - >>> backtick('foo-2.bar`s.my_table') - Traceback (most recent call last): - ... - azul.RequirementError: ('`', 'must not occur in', 'foo-2.bar`s.my_table') - """ - if table_name_re.fullmatch(table_name): - return table_name - else: - return bq(table_name) diff --git a/src/azul/bigquery_reservation.py b/src/azul/bigquery_reservation.py deleted file mode 100644 index 0aa5645724..0000000000 --- a/src/azul/bigquery_reservation.py +++ /dev/null @@ -1,238 +0,0 @@ -import logging - -from google.cloud.bigquery_reservation_v1 import ( - Assignment, - CapacityCommitment, - Edition, - Reservation, - ReservationServiceClient, -) -from google.cloud.bigquery_reservation_v1.services.reservation_service.pagers import ( - ListAssignmentsPager, - ListCapacityCommitmentsPager, - ListReservationsPager, -) -from google.oauth2.service_account import ( - Credentials, -) -from more_itertools import ( - one, -) - -from azul import ( - cached_property, - config, - require, -) -from azul.deployment import ( - aws, -) - -log = logging.getLogger(__name__) - -ResourcePager = ( - ListCapacityCommitmentsPager | - ListReservationsPager | - ListAssignmentsPager -) - -Resource = CapacityCommitment | Reservation | Assignment - - -class BigQueryReservation: - _reservation_id = 'default' - - _rest_api_url = 'https://content-bigqueryreservation.googleapis.com/v1/' - - _http_scopes = ['https://www.googleapis.com/auth/bigquery'] - - _path_suffixes = { - 'capacity_commitment': '', - 'reservation': '', - 'assignment': '/reservations/-' - } - - reservation: Reservation | None - assignment: Assignment | None - location: str - - def __init__(self, - *, - location: str = config.tdr_source_location, - slots: int = config.bigquery_reserved_slots, - dry_run: bool = False): - """ - :param dry_run: If true, methods will not create/update/destroy any - cloud resources. - """ - self.location = location - self.slots = slots - self.dry_run = dry_run - self.refresh() - - def refresh(self): - for resource_type, path_suffix in self._path_suffixes.items(): - self._refresh(resource_type) - - def _refresh(self, resource_type): - pager_method = getattr(self._client, f'list_{resource_type}s') - path_suffix = self._path_suffixes[resource_type] - pager = pager_method(parent=self._reservation_parent_path + path_suffix) - setattr(self, f'{resource_type}', self._single_resource(pager)) - - @cached_property - def credentials(self) -> Credentials: - with aws.service_account_credentials(config.ServiceAccount.indexer) as file_name: - credentials = Credentials.from_service_account_file(file_name) - return credentials.with_scopes(self._http_scopes) - - @cached_property - def _client(self) -> ReservationServiceClient: - return ReservationServiceClient(credentials=self.credentials) - - @property - def _project(self) -> str: - return self.credentials.project_id - - @property - def _reservation_parent_path(self) -> str: - return self._client.common_location_path(project=self._project, - location=self.location) - - @property - def is_active(self) -> bool | None: - resource_statuses = { - self.reservation is not None, - self.assignment is not None - } - try: - return one(resource_statuses) - except ValueError: - return None - - @property - def update_time(self) -> float | None: - """ - The time at which the current Reservation was updated as a Unix - timestamp, or None if is there is no Reservation. - """ - if self.reservation is None: - return None - else: - return self.reservation.update_time.timestamp() - - def activate(self) -> None: - self._create_reservation() - self._assign_slots() - self.refresh() - if not self.dry_run: - assert self.reservation is not None - if not self.is_active: - raise RuntimeError('Failed to activate slots') - if self.reservation.autoscale.max_slots < self.slots: - raise RuntimeError('Failed to acquire enough slots', - self.reservation.autoscale.max_slots, - self.slots) - - def _create_reservation(self) -> None: - """ - Idempotently create reservation. - """ - self._refresh('reservation') - if self.reservation is None: - reservation = Reservation(dict(edition=Edition.STANDARD, - autoscale=Reservation.Autoscale(dict(max_slots=self.slots)), - ignore_idle_slots=True)) - if self.dry_run: - log.info('Would reserve %d BigQuery slots in location %r, reservation ID: %r', - reservation.autoscale.max_slots, self.location, self._reservation_id) - else: - log.info('Reserving %d BigQuery slots in location %r, reservation ID: %r', - reservation.autoscale.max_slots, self.location, self._reservation_id) - reservation = self._client.create_reservation(reservation=reservation, - reservation_id=self._reservation_id, - parent=self._reservation_parent_path) - log.info('Reserved %d BigQuery slots in location %r, reservation name: %r', - reservation.autoscale.max_slots, self.location, reservation.name) - self.reservation = reservation - else: - current_capacity = self.reservation.autoscale.max_slots - log.info('Reservation with capacity %d already created in location %r', - current_capacity, self.location) - if current_capacity < self.slots: - log.info('Capacity deficit is %d', self.slots - current_capacity) - if self.dry_run: - log.info('Would increase reservation capacity to %d', self.slots) - else: - assert self.reservation is not None - log.info('Increasing reservation capacity to %d', self.slots) - self.reservation.autoscale.max_slots = self.slots - reservation = self._client.update_reservation( - reservation=self.reservation, - update_mask='autoscale' - ) - log.info('Reservation now has capacity %d', reservation.autoscale.max_slots) - self.reservation = reservation - - def _assign_slots(self) -> None: - """ - Idempotently assign capacity commitment to a reservation. - """ - self._refresh('assignment') - if self.assignment is not None: - log.info('Slots already assigned in location %r', - self.location) - else: - assignment = Assignment(dict(assignee=f'projects/{self._project}', - job_type=Assignment.JobType.QUERY)) - if self.dry_run: - reservation_name = None if self.reservation is None else self.reservation.name - log.info('Would assign slots to reservation %r in location %r', - reservation_name, self.location) - else: - # FIXME: Mutability of BigQueryReservation confuses type checker - # https://github.com/DataBiosphere/azul/issues/6834 - assert self.reservation is not None - log.info('Assigning slots to reservation %r in location %r', - self.reservation.name, self.location) - assignment = self._client.create_assignment(parent=self.reservation.name, - assignment=assignment) - log.info('Assigned slots in location %r, assignment name: %r', - self.location, assignment.name) - self.assignment = assignment - - def deactivate(self) -> None: - """ - Idempotently delete all resources. - """ - for resource_type in ('assignment', 'reservation', 'capacity_commitment'): - resource = getattr(self, resource_type) - if resource is None: - log.info('%r does not exist in location %r', - resource_type, self.location) - else: - resource_str = f'{resource_type}:{resource.name}' - if self.dry_run: - log.info('Would delete resource %r in location %r', - resource_str, self.location) - else: - delete_method = getattr(self._client, 'delete_' + resource_type) - delete_method(name=resource.name) - log.info('Deleted resource %r in location %r', - resource_str, self.location) - self.refresh() - # self.is_active is None when some, but not all resources are present - if not self.dry_run and self.is_active is not False: - raise RuntimeError(f'Failed to delete slots in location {self.location!r}') - - def _single_resource(self, resources: ResourcePager) -> Resource | None: - resources: list[Resource] = list(resources) - try: - resource, *extras = resources - except ValueError: - return None - else: - require(not extras, - 'Too many resources in path (should be 0 or 1)', - self._reservation_parent_path, resources) - return resource diff --git a/src/azul/bytes.py b/src/azul/bytes.py deleted file mode 100644 index 91f9a0b6fb..0000000000 --- a/src/azul/bytes.py +++ /dev/null @@ -1,114 +0,0 @@ -import base64 - -_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = bytes.maketrans(b'-_+/', b'+/\0\0') - - -def azul_urlsafe_b64encode(x: bytes) -> str: - r""" - Same as base64.urlsafe_b64encode but removes padding and returns a string. - - >>> azul_urlsafe_b64encode(b'') - '' - - >>> azul_urlsafe_b64encode(b'\x00') - 'AA' - >>> base64.urlsafe_b64encode(b'\x00') - b'AA==' - - >>> azul_urlsafe_b64encode(b'\x00\x01') - 'AAE' - >>> base64.urlsafe_b64encode(b'\x00\x01') - b'AAE=' - - """ - x = base64.b64encode(x).rstrip(b'=').translate(_urlsafe_encode_translation) - return x.decode() - - -def azul_urlsafe_b64decode(s: str) -> bytes: - r""" - Same as base64.urlsafe_b64decode but also works with inputs from which - padding was removed *and* rejects inputs with characters not part of the - base64 alphabet. It's also stricter in rejecting the URL-unsafe alt - characters plus and slash. - - >>> azul_urlsafe_b64decode('') - b'' - - >>> azul_urlsafe_b64decode('AQ') - b'\x01' - >>> azul_urlsafe_b64decode('AQ==') - b'\x01' - - >>> azul_urlsafe_b64decode('AQI') - b'\x01\x02' - >>> azul_urlsafe_b64decode('AQI=') - b'\x01\x02' - - >>> azul_urlsafe_b64decode('AQI==') - Traceback (most recent call last): - ... - binascii.Error: Excess data after padding - - >>> azul_urlsafe_b64decode('-_') - b'\xfb' - - An invalid characters is rejected. - - >>> azul_urlsafe_b64decode('AQ$') - Traceback (most recent call last): - ... - binascii.Error: Only base64 data is allowed - - Same for the builtin, but for unintuitive reason. - - >>> base64.urlsafe_b64decode('AQ$') - Traceback (most recent call last): - ... - binascii.Error: Incorrect padding - - The same happens with padding. - - >>> azul_urlsafe_b64decode('AQ$=') - Traceback (most recent call last): - ... - binascii.Error: Only base64 data is allowed - - >>> base64.urlsafe_b64decode('AQ$=') - Traceback (most recent call last): - ... - binascii.Error: Incorrect padding - - With just the right amount of padding, however, the builtin can be coaxed - into ignoring the invalid character … - - >>> base64.urlsafe_b64decode('AQ$==') - b'\x01' - - … whereas this function cannot. - - >>> azul_urlsafe_b64decode('AQ$==') - Traceback (most recent call last): - ... - binascii.Error: Only base64 data is allowed - - Also, somewhat surprisingly, base64.urlsafe_b64decode allows plus and slash - in addition to dash and underscore. - - >>> base64.urlsafe_b64decode('+/==') - b'\xfb' - - This function doesn't. - - >>> azul_urlsafe_b64decode('+/') - Traceback (most recent call last): - ... - binascii.Error: Only base64 data is allowed - """ - # We could pass `altchars=` to b64decode() but that would invoke - # bytes.maketrans() on every invocation. Using a static translation table is - # slightly faster. That's the same approach base64.urlsafe_b64decode uses. - s = s.translate(_urlsafe_decode_translation) - s += '=='[:3 - ((len(s) + 3) % 4)] - return base64.b64decode(s, validate=True) diff --git a/src/azul/chalice.py b/src/azul/chalice.py deleted file mode 100644 index cd18d85279..0000000000 --- a/src/azul/chalice.py +++ /dev/null @@ -1,885 +0,0 @@ -from abc import ( - ABCMeta, -) -from collections.abc import ( - Iterable, -) -from enum import ( - Enum, -) -import json -import logging -import mimetypes -import os -import pathlib -from typing import ( - Any, - Callable, - Iterator, - Literal, - Mapping, - Self, - Sequence, -) -from urllib.parse import ( - unquote, -) - -import attrs -import chalice -from chalice import ( - Chalice, - ChaliceViewError, -) -from chalice.app import ( - BadRequestError, - CaseInsensitiveMapping, - HeadersType, - MultiDict, - NotFoundError, - Request, - Response, -) -import chevron -from furl import ( - furl, -) - -from azul import ( - config, - mutable_furl, - open_resource, - reject, - require, -) -from azul.auth import ( - Authentication, -) -from azul.collections import ( - deep_dict_merge, -) -from azul.csp import ( - CSP, -) -from azul.enums import ( - auto, -) -from azul.json import ( - copy_json, -) -from azul.logging import ( - http_body_log_message, -) -from azul.modules import ( - module_loaded_dynamically, -) -from azul.openapi import ( - format_description, - params, - responses, - schema, -) -from azul.strings import ( - join_words as jw, -) -from azul.types import ( - JSON, - LambdaContext, - MutableJSON, - json_dict, - json_list, - json_str, - not_none, -) - -log = logging.getLogger(__name__) - - -class AzulRequest(Request): - """ - Use only for type hints. The actual requests will be instances of the parent - class, but they will have the attributes defined here. - """ - authentication: Authentication | None - - -# For some reason Chalice does not define an exception for the 410 status code -class GoneError(ChaliceViewError): - STATUS_CODE = 410 - - -# Chalice does not define any exceptions for 5xx status codes besides 500 -class BadGatewayError(ChaliceViewError): - STATUS_CODE = 502 - - -class ServiceUnavailableError(ChaliceViewError): - STATUS_CODE = 503 - - -class LambdaMetric(Enum): - """ - For the full list of supported metrics in the `AWS/Lambda` namespace, see: - https://docs.aws.amazon.com/lambda/latest/dg/monitoring-metrics.html - """ - errors = auto() - throttles = auto() - - @property - def aws_name(self) -> str: - return self.name.capitalize() - - -class AzulChaliceApp(Chalice): - lambda_context: LambdaContext | None - current_request: AzulRequest | None - - def __init__(self, - app_name: str, - globals: Mapping[str, Any], - *, - spec: JSON): - self._patch_event_source_handler() - app_module_path = globals['__file__'] - require(app_module_path.endswith('/app.py'), app_module_path) - self.app_module_path = app_module_path - self.loaded_dynamically = module_loaded_dynamically(globals) - self.non_interactive_routes: set[tuple[str, str]] = set() - reject('paths' in spec, 'The top-level spec must not define paths') - self._specs = self._add_contact_to_spec(spec) - self._specs['paths'] = {} - # The `debug` arg controls whether tracebacks appear in error responses - super().__init__(app_name, debug=config.debug > 1, configure_logs=False) - # Middleware is invoked in order of registration - self.register_middleware(self._logging_middleware, 'http') - self.register_middleware(self._security_headers_middleware, 'http') - self.register_middleware(self._api_gateway_context_middleware, 'http') - self.register_middleware(self._authentication_middleware, 'http') - - def _add_contact_to_spec(self, spec: JSON) -> MutableJSON: - spec = copy_json(spec) - info = json_dict(spec.setdefault('info', {})) - info['description'] = json_str(info.get('description', '')) + config.contact_us - return spec - - @property - def unqualified_app_name(self): - result, _ = config.unqualified_resource_name(self.app_name) - return result - - def __call__(self, event: dict, context: LambdaContext) -> dict[str, Any]: - # Chalice does not URL-decode path parameters - # (https://github.com/aws/chalice/issues/511) - # This appears to actually be a bug in API Gateway, as the parameters - # are already parsed when the event is passed to Chalice - # (https://docs.aws.amazon.com/lambda/latest/dg/services-apigateway.html#apigateway-example-event) - path_params = event['pathParameters'] - if path_params is not None: - for key, value in path_params.items(): - path_params[key] = unquote(value) - return super().__call__(event, context) - - def _patch_event_source_handler(self): - """ - Work around https://github.com/aws/chalice/issues/856. That issue has - been fixed for a while now but in a way that doesn't help us: it makes - the context available in each event object whereas we need the context - in the application object. - """ - import chalice.app - - def patched_event_source_handler(self_, event, context): - self.lambda_context = context - return old_handler(self_, event, context) - - old_handler = chalice.app.EventSourceHandler.__call__ - if old_handler.__code__ != patched_event_source_handler.__code__: - setattr(chalice.app.EventSourceHandler, - '__call__', - patched_event_source_handler) - - def _logging_middleware(self, event, get_response): - self._log_request(not_none(self.current_request)) - response = get_response(event) - self._log_response(response) - return response - - def _authentication_middleware(self, event, get_response): - try: - self.__authenticate() - except ChaliceViewError as e: - response = Response(body={'Code': type(e).__name__, 'Message': str(e)}, - status_code=e.STATUS_CODE) - else: - response = get_response(event) - return response - - def _api_gateway_context_middleware(self, event, get_response): - config.lambda_is_handling_api_gateway_request = True - try: - return get_response(event) - finally: - config.lambda_is_handling_api_gateway_request = False - - @classmethod - def security_headers(cls) -> dict[str, str]: - """ - Default values for headers added to every response from the app, as well - as canned 4XX and 5XX responses from API Gateway. Use of these headers - addresses known security vulnerabilities. - """ - hsts_max_age = 60 * 60 * 24 * 365 * 2 - csp = CSP.for_azul() - return { - 'Content-Security-Policy': str(csp), - 'Referrer-Policy': 'strict-origin-when-cross-origin', - 'Strict-Transport-Security': jw(f'max-age={hsts_max_age};', - 'includeSubDomains;', - 'preload'), - 'X-Content-Type-Options': 'nosniff', - 'X-Frame-Options': 'DENY', - 'X-XSS-Protection': '1; mode=block' - } - - def _security_headers_middleware(self, event, get_response): - """ - Add headers to the response - """ - response = get_response(event) - # Add security headers to the response without overwriting any headers - # that might have been added already (e.g. Content-Security-Policy) - for k, v in self.security_headers().items(): - response.headers.setdefault(k, v) - view_function = self.routes[event.path][event.method].view_function - cache_control = getattr(view_function, 'cache_control') - # Caching defeats the automatic reloading of application source code by - # `chalice local`, which is useful, so we disable caching in that case. - cache_control = 'no-store' if self.is_running_locally else cache_control - response.headers['Cache-Control'] = cache_control - return response - - def _http_cache_for(self, seconds: int): - """ - The HTTP Cache-Control response header value that will cause the - response to the current request to be cached for the given amount of - time. - """ - return f'public, max-age={seconds}, must-revalidate' - - HttpMethod = Literal['GET', 'POST', 'PUT', 'PATCH', 'HEAD', 'OPTIONS', 'DELETE'] - - def route[C: Callable](self, - path: str, - *, - methods: Sequence[HttpMethod] = ('GET',), - enabled: bool = True, - interactive: bool = True, - cache_control: str = 'no-store', - path_spec: JSON | None = None, - spec: JSON | None = None, - **kwargs - ) -> Callable[[C], C]: - """ - Decorates a view handler function in a Chalice application. - - See https://chalice.readthedocs.io/en/latest/api.html#Chalice.route. - - :param path: See https://aws.github.io/chalice/api#Chalice.route - - :param methods: See https://aws.github.io/chalice/api#Chalice.route - - :param enabled: If False, do not route any requests to the decorated - view function. The application will behave as if the - view function wasn't decorated. - - :param interactive: If False, do not show the "Try it out" button in the - Swagger UI. - - :param cache_control: The value to set in the 'Cache-Control' response - header. - - :param path_spec: Corresponds to an OpenAPI Paths Object. See - - https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.3.md#paths-object - - If multiple `@app.route` invocations refer to the same - path (but with different HTTP methods), only specify - this argument for one of them, otherwise an - AssertionError will be raised. - - :param spec: Corresponds to an OpenAPI Operation Object. See - - https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.3.md#operation-object - - Even though this keyword argument has a default value, it - must be specified for every `@app.route` invocation. The - reason for the default is so that the signature of the - override is compatible with that of the overridden method, - a mypy requirement. - """ - require(spec is not None, "Argument 'spec' is required") - assert spec is not None - if enabled: - if not interactive: - require(bool(methods), 'Must list methods with interactive=False') - self.non_interactive_routes.update((path, method) for method in methods) - spec = deep_dict_merge(spec, self.default_specs()) - chalice_decorator = super().route(path, methods=methods, **kwargs) - - def decorator(view_func): - view_func.cache_control = cache_control - self._register_spec(path, methods, path_spec, spec) - return chalice_decorator(view_func) - - return decorator - else: - return lambda view_func: view_func - - def spec(self) -> JSON: - """ - Return the final OpenAPI spec, stripping out unused tags. - - Only call this method after all routes are registered. - """ - used_tags = set( - json_str(tag) - for path in json_dict(self._specs['paths']).values() - for method in json_dict(path).values() if isinstance(method, dict) - for tag in json_list(method.get('tags', [])) - ) - reject('servers' in self._specs, "The 'servers' entry is computed") - return { - **self._specs, - 'tags': [ - tag for tag in json_list(self._specs.get('tags', [])) - if json_dict(tag)['name'] in used_tags - ], - 'servers': [{'url': str(self.base_url.add(path='/'))}] - } - - @property - def self_url(self) -> mutable_furl: - """ - The URL of the current request, including the path, but without query - arguments. Callers can safely modify the returned `furl` instance. - """ - request = self.current_request - assert request is not None - path = request.context['path'] - return self.base_url.add(path=path) - - @property - def base_url(self) -> mutable_furl: - """ - Returns the base URL of this application. Callers can safely modify the - returned `furl` instance. The base URL may or may not have a path and - callers should always append to it. - """ - if self.current_request is None: - # Invocation from outside the context of handling of a request, for - # example, when `chalice local` loads the app module or during an - # invocation via AWS StepFunctions - self_url = config.lambda_endpoint(self.unqualified_app_name) - elif isinstance(self.current_request, Request): - try: - scheme = self.current_request.headers['x-forwarded-proto'] - except KeyError: - # Invocation via `chalice local` or tests - from chalice.constants import ( - DEFAULT_HANDLER_NAME, - ) - lambda_context = self.lambda_context - assert lambda_context is not None - assert lambda_context.function_name == DEFAULT_HANDLER_NAME - scheme = 'http' - else: - # Invocation via API Gateway - pass - self_url = furl(scheme=scheme, netloc=self.current_request.headers['host']) - else: - assert False, self.current_request - return self_url - - @property - def is_running_locally(self) -> bool: - host = self.base_url.netloc.partition(':')[0] - return host in ('localhost', '127.0.0.1') - - def _register_spec(self, - path: str, - methods: Iterable[str], - path_spec: JSON | None, - spec: JSON): - """ - Add a route's specifications to the specification object. - """ - paths = json_dict(self._specs['paths']) - if path_spec is not None: - reject(path in paths, - 'Only specify path_spec once per route path') - paths[path] = copy_json(path_spec) - - for method in methods: - # OpenAPI requires HTTP method names be lower case - method = method.lower() - # This may override duplicate specs from path_specs - path_methods = json_dict(paths.setdefault(path, {})) - reject(method in path_methods, - "Only specify 'spec' once per route path and method") - path_methods[method] = copy_json(spec) - - class _LogJSONEncoder(json.JSONEncoder): - - def default(self, o: Any) -> Any: - if isinstance(o, MultiDict): - # Convert to dict and flatten the singleton values. - return { - k: v[0] if len(v) == 1 else v - for k, v in ((k, o.getlist(k)) for k in o.keys()) - } - elif isinstance(o, CaseInsensitiveMapping): - return dict(o) - else: - return super().default(o) - - def _authenticate(self) -> Authentication | None: - """ - Authenticate the current request, return None if it is unauthenticated, - or raise a ChaliceViewError if it carries invalid authentication. - """ - return None - - def __authenticate(self): - auth = self._authenticate() - attribute_name = 'authentication' - assert attribute_name in AzulRequest.__annotations__ - setattr(self.current_request, attribute_name, auth) - if auth is None: - log.info('Did not authenticate request.') - else: - log.info('Authenticated request as %r', auth) - - def _log_request(self, request: Request) -> None: - info = { - 'query': request.query_params, - 'headers': request.headers - } - info = json.dumps(info, cls=self._LogJSONEncoder) - log.info('Received %s request for %r, with %s.', - request.context['httpMethod'], request.context['path'], info) - log.info(http_body_log_message('request', request.json_body)) - - def _log_response(self, response: Response) -> None: - info = { - 'headers': response.headers - } - info = json.dumps(info, cls=self._LogJSONEncoder) - log.info('Returning %i response with headers %s.', - response.status_code, info) - log.info(http_body_log_message('response', response.body)) - - absent = object() - - def _register_handler(self, - handler_type, - name, - user_handler, - wrapped_handler, - kwargs, - options=None): - super()._register_handler(handler_type, name, user_handler, - wrapped_handler, kwargs, options) - # Our handlers reference the name of the corresponding Lambda function - # which allows the handler to be the single source of truth when - # configuring Terraform, etc. We store other parameters used to - # configure the handler for the same reason. - for attribute, new_value, is_additive in [ - ('name', name, False), - ('queue', kwargs.get('queue', self.absent), False), - ('path', kwargs.get('path', self.absent), True) - ]: - if new_value is not self.absent: - try: - old_value = getattr(wrapped_handler, attribute) - except AttributeError: - if is_additive: - new_value = [new_value] - setattr(wrapped_handler, attribute, new_value) - else: - if is_additive: - old_value.append(new_value) - else: - assert old_value == new_value - - def load_static_resource(self, *path: str) -> str: - for part in path: - if os.sep in part: - raise BadRequestError(part) - try: - return self.load_resource('static', *path) - except FileNotFoundError as e: - log.warning('Resource not found', exc_info=e) - raise NotFoundError(path) - - def load_resource(self, *path: str) -> str: - package_root = os.path.dirname(self.app_module_path) - with open_resource(*path, package_root=package_root) as f: - return f.read() - - @property - def catalog(self) -> str: - request = self.current_request - # A request is only present when this Lambda function is invoked by API - # Gateway (or a simulation like `make local`). Prominent examples of - # when the request is absent are `chalice package` or when the Lambda - # function is invoked via an event schedule. - if request is not None: - params = request.query_params - if params is not None: - try: - return params['catalog'] - except KeyError: - pass - return config.default_catalog - - def swagger_resource(self, file_name: str) -> Response: - body = self.load_static_resource('swagger', file_name) - path = pathlib.Path(file_name) - content_type = mimetypes.types_map[path.suffix] - return Response(status_code=200, - headers={'Content-Type': content_type}, - body=body) - - @attrs.frozen(kw_only=True) - class HandlerDecorator(metaclass=ABCMeta): - """ - A base class for decorators of handler functions. - """ - - #: The unqualified name of the app the handler is part of or None for an - #: unbound decorator. - app_name: str | None = attrs.field(default=None) - - #: The name of the handler, or None for the main handler, or for an - #: unbound decorator. - handler_name: str | None = attrs.field(default=None) - - def bind(self, app: Chalice, handler_name: str | None = None) -> Self: - app_name, _ = config.unqualified_resource_name(app.app_name) - return attrs.evolve(self, app_name=app_name, handler_name=handler_name) - - @property - def tf_function_resource_name(self) -> str: - assert self.app_name is not None, 'Unbound decorator' - if self.handler_name is None: - return self.app_name - else: - assert self.handler_name != '' - return f'{self.app_name}_{self.handler_name}' - - # noinspection PyPep8Naming - @attrs.frozen(kw_only=True) - class metric_alarm(HandlerDecorator): - """ - Use this decorator on a Chalice handler function to configure a metric - alarm for the corresponding Lambda function. This decorator cannot be - used to decorate view functions, i.e. functions also decorated with - ``@app.route``. - """ - #: The CloudWatch metric to configure the alarm for - metric: LambdaMetric - - #: The number of failed or throttled lambda invocations that, when - #: exceeded, will trigger the alarm. - threshold: int - - #: The interval (in seconds) at which the alarm threshold is evaluated, - #: ranging from 1 minute to 1 day. The default is 5 minutes. - period: int - - def __call__(self, f): - assert isinstance(f, chalice.app.EventSourceHandler), f - try: - metric_alarms = getattr(f, 'metric_alarms') - except AttributeError: - metric_alarms = [] - setattr(f, 'metric_alarms', metric_alarms) - metric_alarms.append(self) - return f - - @property - def tf_resource_name(self) -> str: - return f'{self.tf_function_resource_name}_{self.metric.name}' - - @property - def metric_alarms(self) -> Iterator[metric_alarm]: - for metric in LambdaMetric: - # The api_handler lambda functions (indexer & service) aren't - # included in the app_module's handler_map, so we account for those - # first. - for_errors = metric is LambdaMetric.errors - alarm = self.metric_alarm(metric=metric, - threshold=1 if for_errors else 0, - period=24 * 60 * 60 if for_errors else 5 * 60) - yield alarm.bind(self) - for handler_name, handler in self.handler_map.items(): - if isinstance(handler, chalice.app.EventSourceHandler): - try: - metric_alarms = getattr(handler, 'metric_alarms') - except AttributeError: - metric_alarms = ( - self.metric_alarm(metric=metric, - threshold=0, - period=5 * 60) - for metric in LambdaMetric - ) - for metric_alarm in metric_alarms: - yield metric_alarm.bind(self, handler_name) - - # noinspection PyPep8Naming - @attrs.frozen - class retry(HandlerDecorator): - """ - Use this decorator to specify the number of times a Lambda invocation of - the decorated event handler function should be retried. This decorator - cannot be used to decorate view functions, i.e. functions also decorated - with ``@app.route``. - - https://docs.aws.amazon.com/lambda/latest/dg/invocation-retries.html - """ - num_retries: int - - def __call__(self, f): - assert isinstance(f, chalice.app.EventSourceHandler), f - setattr(f, 'retry', self) - return f - - @property - def retries(self) -> Iterator[retry]: - for handler_name, handler in self.handler_map.items(): - if isinstance(handler, chalice.app.EventSourceHandler): - try: - retry = getattr(handler, 'retry') - except AttributeError: - pass - else: - yield retry.bind(self, handler_name) - - def default_routes(self): - - @self.route( - '/', - interactive=False, - spec={ - 'summary': 'Redirect to the Swagger UI for interactive use of this REST API', - 'tags': ['Auxiliary'], - 'responses': { - '301': { - 'description': 'A redirect to the Swagger UI' - } - } - } - ) - def swagger_redirect(): - headers: HeadersType = { - 'Location': str(self.base_url.set(path='swagger/index.html')) - } - return Response(status_code=301, body='', headers=headers) - - @self.route( - '/swagger/index.html', - interactive=False, - cache_control=self._http_cache_for(24 * 60 * 60), - cors=False, - spec={ - 'summary': 'The Swagger UI for interactive use of this REST API', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': 'The response body is an HTML page containing the Swagger UI' - } - } - } - ) - def swagger_ui(): - return self.swagger_resource('index.html') - - @self.route( - '/swagger/swagger-initializer.js', - interactive=False, - cache_control=self._http_cache_for(60), - cors=True, - spec={ - 'summary': 'Used internally by the Swagger UI', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': 'The response body is JavaScript used internally by the Swagger UI' - } - } - } - ) - def swagger_initializer(): - file_name = 'swagger-initializer.js.template.mustache' - template = self.load_static_resource('swagger', file_name) - base_url = self.base_url - redirect_url = furl(base_url).add(path='oauth2_redirect') - openapi_spec = furl(base_url).add(path='openapi.json') - body = chevron.render(template, { - 'OPENAPI_SPEC': json.dumps(str(openapi_spec.path)), - 'OAUTH2_CLIENT_ID': json.dumps(config.google_oauth2_client_id), - 'OAUTH2_REDIRECT_URL': json.dumps(str(redirect_url)), - 'NON_INTERACTIVE_METHODS': json.dumps([ - f'{path}/{method.lower()}' - for path, method in self.non_interactive_routes - ]) - }) - headers: HeadersType = {'Content-Type': 'application/javascript'} - return Response(status_code=200, body=body, headers=headers) - - @self.route( - '/swagger/{file}', - interactive=False, - cache_control=self._http_cache_for(24 * 60 * 60), - cors=True, - spec={ - 'summary': 'Static files needed for the Swagger UI', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': 'The response body is the contents of the requested file' - }, - '404': { - 'description': 'The requested file does not exist' - } - } - }, - path_spec={ - 'parameters': [ - params.path('file', str, description='The name of a static file to be returned') - ] - } - ) - def swagger_resource(file): - return self.swagger_resource(file) - - @self.route( - '/openapi.json', - methods=['GET'], - cache_control=self._http_cache_for(60), - cors=True, - spec={ - 'summary': 'Return OpenAPI specifications for this REST API', - 'description': format_description(''' - This endpoint returns the [OpenAPI specifications]' - (https://github.com/OAI/OpenAPI-Specification) for this REST - API. These are the specifications used to generate the page - you are visiting now. - '''), - 'responses': { - '200': { - 'description': '200 response', - **responses.json_content( - schema.object( - openapi=str, - **{ - k: schema.object() - for k in ('info', 'tags', 'servers', 'paths', 'components') - } - ) - ) - } - }, - 'tags': ['Auxiliary'] - } - ) - def openapi(): - return Response(status_code=200, - headers={'content-type': 'application/json'}, - body=self.spec()) - - @self.route( - '/version', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Describe current version of this REST API', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': 'Version endpoint is reachable.', - **responses.json_content( - schema.object( - git=schema.object( - commit=str, - dirty=bool - ) - ) - ) - } - } - } - ) - def version(): - return { - 'git': config.lambda_git_status - } - - @self.route( - '/robots.txt', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Robots Exclusion Protocol', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': format_description(''' - The robots.txt resource according to - [RFC9309](https://datatracker.ietf.org/doc/html/rfc9309) - '''), - } - } - } - ) - def robots_txt(): - body = '\n'.join(f'{k}: {v}' for k, v in [ - ('User-agent', '*'), - ('Disallow', '/'), - # Keep consistent with regex in scope-down statement for the - # bot control rule set in api_gateway.tf.json.template.py - ('Allow', '/$'), - ('Allow', '/swagger/') - ]) - headers: HeadersType = {'Content-Type': 'text/plain'} - return Response(status_code=200, headers=headers, body=body) - - return locals() - - def default_specs(self): - return { - 'responses': { - '504': { - 'description': format_description(''' - Request timed out. When handling this response, clients - should wait the number of seconds specified in the - `Retry-After` header and then retry the request. - ''') - } - } - } - - -@attrs.frozen(kw_only=True) -class AppController: - app: AzulChaliceApp - - @property - def lambda_context(self) -> LambdaContext: - assert self.app.lambda_context is not None - return self.app.lambda_context - - @property - def current_request(self) -> AzulRequest: - assert self.app.current_request is not None - return self.app.current_request diff --git a/src/azul/compliance/__init__.py b/src/azul/compliance/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/compliance/fedramp_inventory_service.py b/src/azul/compliance/fedramp_inventory_service.py deleted file mode 100644 index 69ab18103b..0000000000 --- a/src/azul/compliance/fedramp_inventory_service.py +++ /dev/null @@ -1,932 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - Counter, - defaultdict, -) -import inspect -import json -import logging -from operator import ( - attrgetter, -) -import pathlib -import sys -from typing import ( - AbstractSet, - Iterable, - Iterator, - Optional, - Self, - Sequence, -) - -import attr -import attrs -from furl import ( - furl, -) -import gitlab.v4.objects.projects -from more_itertools import ( - chunked, - flatten, -) -import openpyxl -from openpyxl.utils import ( - get_column_letter, -) -from openpyxl.worksheet.worksheet import ( - Worksheet, -) - -from azul import ( - cached_property, - config, -) -from azul.deployment import ( - aws, -) -from azul.types import ( - JSON, - JSONs, -) - -log = logging.getLogger(__name__) - - -@attr.s(auto_attribs=True, frozen=True, kw_only=True) -class ResourceConfig: - id: str - name: Optional[str] - region: str - type: str - state_id: str - config: JSON - supplementary_config: JSON - - @classmethod - def from_response(cls, response: dict) -> Self: - return cls( - id=response['resourceId'], - name=response.get('resourceName'), - region=response['awsRegion'], - type=response['resourceType'], - state_id=response['configurationStateId'], - config=json.loads(response['configuration']), - supplementary_config=response['supplementaryConfiguration'] - ) - - -null_str = Optional[str] - - -class YesNo: - yes = 'Yes' - no = 'No' - - @classmethod - def from_bool(cls, b: bool) -> str: - return cls.yes if b else cls.no - - -@attr.s(auto_attribs=True, frozen=True, kw_only=True) -class InventoryRow: - unique_id: null_str = attr.ib(default=None) - ip_address: null_str = attr.ib(default=None) - is_virtual: null_str = attr.ib(default=None) - is_public: null_str = attr.ib(default=None) - dns_name: null_str = attr.ib(default=None) - netbios_name: null_str = attr.ib(default=None) - mac_address: null_str = attr.ib(default=None) - authenticated_scan_planned: null_str = attr.ib(default=None) - baseline_config: null_str = attr.ib(default=None) - os: null_str = attr.ib(default=None) - location: null_str = attr.ib(default=None) - asset_type: null_str = attr.ib(default=None) - hardware_model: null_str = attr.ib(default=None) - in_latest_scan: null_str = attr.ib(default=None) - software_vendor: null_str = attr.ib(default=None) - software_product_name: null_str = attr.ib(default=None) - patch_level: null_str = attr.ib(default=None) - purpose: null_str = attr.ib(default=None) - comments: null_str = attr.ib(default=None) - asset_tag: null_str = attr.ib(default=None) - network_id: null_str = attr.ib(default=None) - system_owner: null_str = attr.ib(default=None) - application_owner: null_str = attr.ib(default=None) - - -class Mapper(metaclass=ABCMeta): - - @abstractmethod - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - raise NotImplementedError - - def _common_fields(self, - resource: ResourceConfig, - *, - id_suffix: Optional[str] = None - ) -> dict: - return dict( - asset_tag=resource.name, - location=resource.region, - software_vendor='AWS', - system_owner=config.owner, - application_owner=config.owner, - unique_id=resource.id + ('' if id_suffix is None else f'/{id_suffix}') - ) - - def _supported_resource_types(self) -> AbstractSet[str]: - return frozenset() - - def can_map(self, resource: ResourceConfig) -> bool: - return resource.type in self._supported_resource_types() - - -class LambdaMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::Lambda::Function'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS Lambda Function', - baseline_config=resource.config['runtime'], - is_public=YesNo.no, - is_virtual=YesNo.yes, - purpose=resource.config.get('description'), - software_product_name='AWS Lambda', - **self._common_fields(resource) - ) - - -class ElasticSearchMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::Elasticsearch::Domain'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS OpenSearch Domain', - baseline_config=resource.config['elasticsearchVersion'], - is_public=YesNo.no, - is_virtual=YesNo.yes, - network_id=resource.config['endpoints'].get('vpc'), - patch_level=resource.config.get('serviceSoftwareOptions', {}).get('currentVersion'), - software_product_name='AWS OpenSearch', - **self._common_fields(resource) - ) - - -class EC2Mapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::EC2::Instance'} - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - for nic in resource.config['networkInterfaces']: - for ip_addresses in nic['privateIpAddresses']: - ip_addresses: JSON - association = ip_addresses.get('association') - ips = [ - dict(ip_address=ip_addresses['privateIpAddress'], - dns_name=ip_addresses['privateDnsName'], - is_public=YesNo.no), - *(() if association is None else ( - dict(ip_address=association['publicIp'], - is_public=YesNo.yes, - dns_name=resource.config.get('publicDnsName')) - )) - ] - for ip_fields in ips: - yield InventoryRow( - asset_type='AWS EC2 Instance', - authenticated_scan_planned=YesNo.yes, - baseline_config=resource.config['imageId'], - hardware_model=resource.config['instanceType'], - is_virtual=YesNo.yes, - mac_address=nic['macAddress'], - network_id=ip_addresses.get('subnetId'), - **ip_fields, - **self._common_fields(resource, id_suffix=ip_fields['ip_address']) - ) - - def _get_ip_address(self, ip_addresses: JSON, keys) -> str: - for key in keys: - ip_addresses = ip_addresses[key] - return ip_addresses - - -class ELBMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return { - 'AWS::ElasticLoadBalancing::LoadBalancer', - 'AWS::ElasticLoadBalancingV2::LoadBalancer' - } - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - ip_addresses = self._get_ip_addresses(resource.config['availabilityZones']) - if not ip_addresses: - ip_addresses = [None] - for ip_address in ip_addresses: - yield InventoryRow( - dns_name=resource.config['dNSName'], - ip_address=ip_address, - is_public=YesNo.from_bool(resource.config['scheme'] == 'internet-facing'), - is_virtual=YesNo.yes, - **self._polymorphic_fields(resource), - **self._common_fields(resource, id_suffix=ip_address) - ) - - def _polymorphic_fields(self, resource: ResourceConfig) -> dict[str, str]: - # Classic ELBs have key of 'vpcid' while V2 ELBs have key of 'vpcId' - prefix = 'AWS Elastic Load Balancer-' - if resource.type == 'AWS::ElasticLoadBalancing::LoadBalancer': - asset_type = prefix + 'Classic' - network_id = resource.config['vpcid'] - else: - asset_type = prefix + resource.config['type'] - network_id = resource.config['vpcId'] - return dict(asset_type=asset_type, network_id=network_id) - - def _get_ip_addresses(self, availability_zones: JSONs) -> set[Optional[str]]: - return { - load_balancer_address.get('ipAddress') - for availability_zone in availability_zones - for load_balancer_addresses in availability_zone.get('loadBalancerAddresses', ()) - for load_balancer_address in load_balancer_addresses - } - - -class NetworkInterfaceMapper(Mapper): - - def _supported_resource_types(self) -> AbstractSet[str]: - return {'AWS::EC2::NetworkInterface'} - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - ips = [ - dict(is_public=YesNo.no, - ip_address=private_ip['privateIpAddress'], - dns_name=private_ip.get('privateDnsName')) - for private_ip in resource.config['privateIpAddresses'] - ] - association = resource.config.get('association') - if association is not None: - ips.append(dict(is_public=YesNo.yes, - ip_address=association['publicIp'], - dns_name=association['publicDnsName'])) - for ip_fields in ips: - yield InventoryRow( - asset_type='AWS EC2 Network Interface', - mac_address=resource.config.get('macAddress'), - network_id=resource.config['subnetId'], - purpose=resource.config.get('description'), - **ip_fields, - **self._common_fields(resource, id_suffix=ip_fields['ip_address']) - ) - - -class S3Mapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::S3::Bucket'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS S3 Bucket', - comments=self._get_encryption_status(resource), - is_public=YesNo.from_bool(self._get_is_public(resource)), - is_virtual=YesNo.yes, - **self._common_fields(resource) - ) - - def _get_is_public(self, resource: ResourceConfig) -> bool: - try: - public_access_config = resource.supplementary_config['PublicAccessBlockConfiguration'] - except KeyError: - # If there is no PublicAccessBlockConfiguration then this bucket is public - return True - else: - public_access_config = json.loads(public_access_config) - # The bucket is public if any access blocks are false - return not all(public_access_config.values()) - - def _get_encryption_status(self, resource: ResourceConfig) -> str: - if 'ServerSideEncryptionConfiguration' in resource.supplementary_config: - return 'Encrypted' - else: - return 'Not encrypted' - - -class DynamoDbTableMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::DynamoDB::Table'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS DynamoDB Table', - is_public=YesNo.no, - is_virtual=YesNo.yes, - software_product_name='AWS DynamoDB', - **self._common_fields(resource) - ) - - -class ElasticIPMapper(Mapper): - - def _supported_resource_types(self) -> AbstractSet[str]: - return {'AWS::EC2::EIP'} - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - for ip, is_public in [ - (resource.config['publicIp'], YesNo.yes), - (resource.config['privateIpAddress'], YesNo.no) - ]: - yield InventoryRow( - asset_type='AWS EC2 Elastic IP', - ip_address=ip, - is_public=is_public, - network_id=resource.config['networkInterfaceId'], - **self._common_fields(resource, id_suffix=ip) - ) - - -class RDSMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::RDS::DBInstance'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS RDS Instance', - hardware_model=resource.config['dBInstanceClass'], - is_public=YesNo.from_bool(resource.config['publiclyAccessible']), - is_virtual=YesNo.yes, - network_id=resource.config.get('dBSubnetGroup', {}).get('vpcId'), - software_product_name=f"{resource.config['engine']}-{resource.config['engineVersion']}", - **self._common_fields(resource) - ) - - -class VPCMapper(Mapper): - - def _supported_resource_types(self) -> set[str]: - return {'AWS::EC2::VPC'} - - def map(self, resource: ResourceConfig) -> Iterator[InventoryRow]: - yield InventoryRow( - asset_type='AWS VPC', - baseline_config=resource.state_id, - ip_address=resource.config['cidrBlock'], - is_public=YesNo.yes, - is_virtual=YesNo.yes, - network_id=resource.config['vpcId'], - **self._common_fields(resource) - ) - - -class ACMCertificateMapper(Mapper): - - def _supported_resource_types(self) -> AbstractSet[str]: - return {'AWS::ACM::Certificate'} - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - yield InventoryRow( - asset_type='AWS ACM Certificate', - **self._common_fields(resource) - ) - for user in resource.config['inUseBy']: - parts, id = user.split('/', 1) - parts = parts.split(':') - if parts[:2] == ['aws', 'clientvpn']: - _, resource_type, region, stage = parts - url = '.'.join([id, stage, resource_type, region, 'amazonaws.com']) - yield InventoryRow( - asset_tag=user, - asset_type='AWS Client VPN', - dns_name=url, - location=region, - software_vendor='AWS', - unique_id=url + ':443', - ) - - -class ResourceComplianceMapper(Mapper): - - def _supported_resource_types(self) -> AbstractSet[str]: - return {'AWS::Config::ResourceCompliance'} - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - # Intentionally omit rows for this resource type - return () - - -class DefaultMapper(Mapper): - - def can_map(self, resource: ResourceConfig) -> bool: - return True - - def map(self, resource: ResourceConfig) -> Iterable[InventoryRow]: - yield InventoryRow( - asset_type=resource.type, - **self._common_fields(resource) - ) - - -class FedRAMPInventoryService: - default_column_width = 10 - first_writable_row = 6 - report_worksheet_name = 'Inventory' - - @property - def config(self): - return aws.client('config') - - @cached_property - def _mappers(self) -> Sequence[Mapper]: - current_module = sys.modules[__name__] - - def is_mapper_cls(o: object) -> bool: - return ( - inspect.isclass(o) - and not inspect.isabstract(o) - and issubclass(o, Mapper) - ) - - mapper_clss = [ - mapper_cls - for name, mapper_cls in inspect.getmembers(current_module, is_mapper_cls) - ] - - def get_linenno(o: type) -> int: - src, lineno = inspect.findsource(o) - return lineno - - mapper_clss.sort(key=get_linenno) - return [mapper_cls() for mapper_cls in mapper_clss] - - def resource_ids_by_type(self) -> defaultdict[str, list[str]]: - resource_ids_by_type = defaultdict(list) - for resource_type in self._all_aws_resource_types: - args = dict(resourceType=resource_type) - while True: - response = self.config.list_discovered_resources(**args) - resources = response['resourceIdentifiers'] - log.debug('Discovered %d resources of type %s', len(resources), resource_type) - for resource in resources: - assert resource['resourceType'] == resource_type - resource_ids_by_type[resource_type].append(resource['resourceId']) - next_token = response.get('nextToken') - if next_token is None: - break - else: - args['nextToken'] = next_token - return resource_ids_by_type - - def get_resources(self) -> Iterator[ResourceConfig]: - for resource_type, resource_ids in self.resource_ids_by_type().items(): - # Maximum permitted batch size - for resource_ids in chunked(resource_ids, 100): - resource_keys = [ - dict(resourceType=resource_type, resourceId=resource_id) - for resource_id in resource_ids - ] - while resource_keys: - response = self.config.batch_get_resource_config(resourceKeys=resource_keys) - items = response['baseConfigurationItems'] - log.debug('Got page of %d resources of type %s', len(items), resource_type) - yield from map(ResourceConfig.from_response, items) - resource_keys = response['unprocessedResourceKeys'] - - def get_inventory(self, - resources: Iterable[ResourceConfig] - ) -> Iterable[InventoryRow]: - rows_by_mapper: defaultdict[Mapper, list[InventoryRow]] = defaultdict(list) - resource_counts = Counter() - row_counts = Counter() - for resource in resources: - mapper = self._get_mapper(resource) - log.debug('Mapping %r resource using %r', - resource.type, type(mapper).__name__) - rows = sorted(mapper.map(resource), key=attrgetter('unique_id')) - log.debug('Mapped to %d rows', len(rows)) - resource_counts[resource.type] += 1 - row_counts[resource.type] += len(rows) - rows_by_mapper[mapper].extend(rows) - - log.info('Inventory contents:') - print(f'\n{"Resource type":<42s}' - f'{"# resources":<20s}' - f'{"# rows":<20s}\n') - for resource_type in resource_counts.keys(): - print(f'{resource_type:<42s}' - f'{resource_counts[resource_type]:>15d}' - f'{row_counts[resource_type]:>10d}') - - return flatten(rows_by_mapper[mapper] for mapper in self._mappers) - - def get_synthetic_inventory(self) -> Iterable[InventoryRow]: - data_browser_url = furl(scheme='https', netloc=config.data_browser_domain) - yield InventoryRow( - asset_type='Application endpoint', - dns_name=str(data_browser_url), - is_public=YesNo.yes, - purpose='UI for external users', - software_vendor='UCSC', - system_owner=config.owner, - application_owner=config.owner, - unique_id='Data Browser UI', - ) - yield InventoryRow( - asset_type='Service endpoint', - dns_name=str(config.service_endpoint), - is_public=YesNo.from_bool(not config.private_api), - purpose='Service API (backend for Data Browser UI, programmatic use by external users)', - software_vendor='UCSC', - system_owner=config.owner, - application_owner=config.owner, - unique_id='Service REST API', - ) - yield InventoryRow( - asset_type='Application endpoint', - dns_name=str(config.indexer_endpoint), - is_public=YesNo.from_bool(not config.private_api), - purpose='Indexer API (primarily for internal users)', - software_vendor='UCSC', - system_owner=config.owner, - application_owner=config.owner, - unique_id='Indexer API', - ) - - for unique_id, purpose, port, scheme in [ - ('GitLab UI', 'CI/CD (internal users only)', None, 'https'), - ('GitLab SSH', 'CI/CD (system administrators only)', 2222, 'ssh'), - ('GitLab Git', 'Source repository for CI/CD (internal users only)', 22, 'git+ssh') - ]: - gitlab_url = furl(scheme=scheme, - host=f'gitlab.{config.domain_name}', - port=port) - yield InventoryRow( - asset_type='Service endpoint', - dns_name=str(gitlab_url), - is_public=YesNo.no, - software_vendor='GitLab', - system_owner=config.owner, - application_owner=config.owner, - purpose=purpose, - unique_id=unique_id, - ) - - def write_report(self, - inventory: Iterable[InventoryRow], - template_path: pathlib.Path, - output_path: pathlib.Path - ) -> None: - workbook = openpyxl.load_workbook(template_path) - worksheet = workbook[self.report_worksheet_name] - for row_number, row in enumerate(inventory, start=self.first_writable_row): - row = attr.astuple(row) - for column_number, value in enumerate(row, start=1): - self._write_cell_if_value_provided(worksheet, - column=column_number, - row=row_number, - value=value) - workbook.save(output_path) - log.info('Wrote report to %s', output_path) - - def update_wiki(self, - project: gitlab.v4.objects.projects.Project, - page_name: str, - resources: Iterable[ResourceConfig], - ) -> None: - content = self._wiki_content(resources) - try: - page = project.wikis.get(page_name) - except gitlab.exceptions.GitlabError as e: - if e.response_code == 404: - log.info('Wiki page %r not found', page_name) - project.wikis.create({ - 'title': page_name, - 'content': content - }) - log.info('Created wiki page %r (character count: %d)', - page_name, len(content)) - else: - raise - else: - old_length = len(page.content) - page.content = content - page.save() - log.info('Updated wiki page %r (character count: %d -> %d)', - page_name, old_length, len(content)) - - def _get_mapper(self, resource: ResourceConfig) -> Mapper: - return next( - mapper - for mapper in self._mappers - if mapper.can_map(resource) - ) - - def _write_cell_if_value_provided(self, - worksheet: Worksheet, - column: int, - row: int, - value: Optional[str] - ) -> None: - if value: - # Scale the size of the column with the input value if necessary. - # By default, width is None. - dimensions = worksheet.column_dimensions[get_column_letter(column)] - if dimensions.width is None: - dimensions.width = self.default_column_width - else: - dimensions.width = max(dimensions.width, len(value)) - worksheet.cell(column=column, row=row, value=value) - - def _wiki_content(self, resources: Iterable[ResourceConfig]) -> str: - return '\n\n'.join( - f'```\n{json.dumps(attrs.asdict(resource), indent=4)}\n```' - for resource in resources - ) - - # https://docs.aws.amazon.com/config/latest/APIReference/API_ListDiscoveredResources.html#API_ListDiscoveredResources_RequestSyntax - _all_aws_resource_types = [ - 'AWS::ACM::Certificate', - 'AWS::AccessAnalyzer::Analyzer', - 'AWS::AmazonMQ::Broker', - 'AWS::ApiGateway::RestApi', - 'AWS::ApiGateway::Stage', - 'AWS::ApiGatewayV2::Api', - 'AWS::ApiGatewayV2::Stage', - 'AWS::AppConfig::Application', - 'AWS::AppConfig::ConfigurationProfile', - 'AWS::AppConfig::Environment', - 'AWS::AppStream::DirectoryConfig', - 'AWS::AppSync::GraphQLApi', - 'AWS::Athena::DataCatalog', - 'AWS::Athena::WorkGroup', - 'AWS::AutoScaling::AutoScalingGroup', - 'AWS::AutoScaling::LaunchConfiguration', - 'AWS::AutoScaling::ScalingPolicy', - 'AWS::AutoScaling::ScheduledAction', - 'AWS::AutoScaling::WarmPool', - 'AWS::Backup::BackupPlan', - 'AWS::Backup::BackupSelection', - 'AWS::Backup::BackupVault', - 'AWS::Backup::RecoveryPoint', - 'AWS::Backup::ReportPlan', - 'AWS::Batch::ComputeEnvironment', - 'AWS::Batch::JobQueue', - 'AWS::Budgets::BudgetsAction', - 'AWS::Cloud9::EnvironmentEC2', - 'AWS::CloudFormation::Stack', - 'AWS::CloudFront::Distribution', - 'AWS::CloudFront::StreamingDistribution', - 'AWS::CloudTrail::Trail', - 'AWS::CloudWatch::Alarm', - 'AWS::CodeBuild::Project', - 'AWS::CodeDeploy::Application', - 'AWS::CodeDeploy::DeploymentConfig', - 'AWS::CodeDeploy::DeploymentGroup', - 'AWS::CodeGuruReviewer::RepositoryAssociation', - 'AWS::CodePipeline::Pipeline', - 'AWS::Config::ConformancePackCompliance', - 'AWS::Config::ResourceCompliance', - 'AWS::Connect::PhoneNumber', - 'AWS::CustomerProfiles::Domain', - 'AWS::DMS::Certificate', - 'AWS::DMS::EventSubscription', - 'AWS::DMS::ReplicationSubnetGroup', - 'AWS::DataSync::LocationEFS', - 'AWS::DataSync::LocationFSxLustre', - 'AWS::DataSync::LocationFSxWindows', - 'AWS::DataSync::LocationHDFS', - 'AWS::DataSync::LocationNFS', - 'AWS::DataSync::LocationObjectStorage', - 'AWS::DataSync::LocationS3', - 'AWS::DataSync::LocationSMB', - 'AWS::DataSync::Task', - 'AWS::Detective::Graph', - 'AWS::DeviceFarm::TestGridProject', - 'AWS::DynamoDB::Table', - 'AWS::EC2::CustomerGateway', - 'AWS::EC2::DHCPOptions', - 'AWS::EC2::EIP', - 'AWS::EC2::EgressOnlyInternetGateway', - 'AWS::EC2::FlowLog', - 'AWS::EC2::Host', - 'AWS::EC2::IPAM', - 'AWS::EC2::Instance', - 'AWS::EC2::InternetGateway', - 'AWS::EC2::LaunchTemplate', - 'AWS::EC2::NatGateway', - 'AWS::EC2::NetworkAcl', - 'AWS::EC2::NetworkInsightsAccessScopeAnalysis', - 'AWS::EC2::NetworkInsightsPath', - 'AWS::EC2::NetworkInterface', - 'AWS::EC2::RegisteredHAInstance', - 'AWS::EC2::RouteTable', - 'AWS::EC2::SecurityGroup', - 'AWS::EC2::Subnet', - 'AWS::EC2::TrafficMirrorFilter', - 'AWS::EC2::TrafficMirrorSession', - 'AWS::EC2::TrafficMirrorTarget', - 'AWS::EC2::TransitGateway', - 'AWS::EC2::TransitGatewayAttachment', - 'AWS::EC2::TransitGatewayRouteTable', - 'AWS::EC2::VPC', - 'AWS::EC2::VPCEndpoint', - 'AWS::EC2::VPCEndpointService', - 'AWS::EC2::VPCPeeringConnection', - 'AWS::EC2::VPNConnection', - 'AWS::EC2::VPNGateway', - 'AWS::EC2::Volume', - 'AWS::ECR::PublicRepository', - 'AWS::ECR::RegistryPolicy', - 'AWS::ECR::Repository', - 'AWS::ECS::Cluster', - 'AWS::ECS::Service', - 'AWS::ECS::TaskDefinition', - 'AWS::EFS::AccessPoint', - 'AWS::EFS::FileSystem', - 'AWS::EKS::Addon', - 'AWS::EKS::Cluster', - 'AWS::EKS::FargateProfile', - 'AWS::EKS::IdentityProviderConfig', - 'AWS::EMR::SecurityConfiguration', - 'AWS::ElasticBeanstalk::Application', - 'AWS::ElasticBeanstalk::ApplicationVersion', - 'AWS::ElasticBeanstalk::Environment', - 'AWS::ElasticLoadBalancing::LoadBalancer', - 'AWS::ElasticLoadBalancingV2::Listener', - 'AWS::ElasticLoadBalancingV2::LoadBalancer', - 'AWS::Elasticsearch::Domain', - 'AWS::EventSchemas::Discoverer', - 'AWS::EventSchemas::Registry', - 'AWS::EventSchemas::RegistryPolicy', - 'AWS::EventSchemas::Schema', - 'AWS::Events::ApiDestination', - 'AWS::Events::Archive', - 'AWS::Events::Connection', - 'AWS::Events::Endpoint', - 'AWS::Events::EventBus', - 'AWS::Events::Rule', - 'AWS::FIS::ExperimentTemplate', - 'AWS::FraudDetector::EntityType', - 'AWS::FraudDetector::Label', - 'AWS::FraudDetector::Outcome', - 'AWS::FraudDetector::Variable', - 'AWS::GlobalAccelerator::Accelerator', - 'AWS::GlobalAccelerator::EndpointGroup', - 'AWS::GlobalAccelerator::Listener', - 'AWS::Glue::Classifier', - 'AWS::Glue::Job', - 'AWS::Glue::MLTransform', - 'AWS::GuardDuty::Detector', - 'AWS::GuardDuty::Filter', - 'AWS::GuardDuty::IPSet', - 'AWS::GuardDuty::ThreatIntelSet', - 'AWS::HealthLake::FHIRDatastore', - 'AWS::IAM::Group', - 'AWS::IAM::Policy', - 'AWS::IAM::Role', - 'AWS::IAM::User', - 'AWS::IVS::Channel', - 'AWS::IVS::PlaybackKeyPair', - 'AWS::IVS::RecordingConfiguration', - 'AWS::ImageBuilder::ContainerRecipe', - 'AWS::ImageBuilder::DistributionConfiguration', - 'AWS::ImageBuilder::InfrastructureConfiguration', - 'AWS::IoT::AccountAuditConfiguration', - 'AWS::IoT::Authorizer', - 'AWS::IoT::CustomMetric', - 'AWS::IoT::Dimension', - 'AWS::IoT::MitigationAction', - 'AWS::IoT::Policy', - 'AWS::IoT::RoleAlias', - 'AWS::IoT::ScheduledAudit', - 'AWS::IoT::SecurityProfile', - 'AWS::IoTAnalytics::Channel', - 'AWS::IoTAnalytics::Dataset', - 'AWS::IoTAnalytics::Datastore', - 'AWS::IoTAnalytics::Pipeline', - 'AWS::IoTEvents::AlarmModel', - 'AWS::IoTEvents::DetectorModel', - 'AWS::IoTEvents::Input', - 'AWS::IoTSiteWise::AssetModel', - 'AWS::IoTSiteWise::Dashboard', - 'AWS::IoTSiteWise::Gateway', - 'AWS::IoTSiteWise::Portal', - 'AWS::IoTSiteWise::Project', - 'AWS::IoTTwinMaker::Entity', - 'AWS::IoTTwinMaker::Scene', - 'AWS::IoTTwinMaker::Workspace', - 'AWS::KMS::Key', - 'AWS::Kinesis::Stream', - 'AWS::Kinesis::StreamConsumer', - 'AWS::KinesisAnalyticsV2::Application', - 'AWS::KinesisVideo::SignalingChannel', - 'AWS::Lambda::Function', - 'AWS::Lex::Bot', - 'AWS::Lex::BotAlias', - 'AWS::Lightsail::Bucket', - 'AWS::Lightsail::Certificate', - 'AWS::Lightsail::Disk', - 'AWS::Lightsail::StaticIp', - 'AWS::LookoutMetrics::Alert', - 'AWS::LookoutVision::Project', - 'AWS::MSK::Cluster', - 'AWS::MediaPackage::PackagingConfiguration', - 'AWS::MediaPackage::PackagingGroup', - 'AWS::NetworkFirewall::Firewall', - 'AWS::NetworkFirewall::FirewallPolicy', - 'AWS::NetworkFirewall::RuleGroup', - 'AWS::NetworkManager::TransitGatewayRegistration', - 'AWS::OpenSearch::Domain', - 'AWS::Pinpoint::ApplicationSettings', - 'AWS::Pinpoint::Segment', - 'AWS::QLDB::Ledger', - 'AWS::RDS::DBCluster', - 'AWS::RDS::DBClusterSnapshot', - 'AWS::RDS::DBInstance', - 'AWS::RDS::DBSecurityGroup', - 'AWS::RDS::DBSnapshot', - 'AWS::RDS::DBSubnetGroup', - 'AWS::RDS::EventSubscription', - 'AWS::RDS::GlobalCluster', - 'AWS::RUM::AppMonitor', - 'AWS::Redshift::Cluster', - 'AWS::Redshift::ClusterParameterGroup', - 'AWS::Redshift::ClusterSecurityGroup', - 'AWS::Redshift::ClusterSnapshot', - 'AWS::Redshift::ClusterSubnetGroup', - 'AWS::Redshift::EventSubscription', - 'AWS::ResilienceHub::ResiliencyPolicy', - 'AWS::RoboMaker::RobotApplication', - 'AWS::RoboMaker::RobotApplicationVersion', - 'AWS::RoboMaker::SimulationApplication', - 'AWS::Route53::HostedZone', - 'AWS::Route53RecoveryControl::Cluster', - 'AWS::Route53RecoveryControl::ControlPanel', - 'AWS::Route53RecoveryControl::RoutingControl', - 'AWS::Route53RecoveryControl::SafetyRule', - 'AWS::Route53RecoveryReadiness::Cell', - 'AWS::Route53RecoveryReadiness::ReadinessCheck', - 'AWS::Route53RecoveryReadiness::RecoveryGroup', - 'AWS::Route53RecoveryReadiness::ResourceSet', - 'AWS::Route53Resolver::FirewallDomainList', - 'AWS::Route53Resolver::ResolverEndpoint', - 'AWS::Route53Resolver::ResolverRule', - 'AWS::Route53Resolver::ResolverRuleAssociation', - 'AWS::S3::AccountPublicAccessBlock', - 'AWS::S3::Bucket', - 'AWS::S3::MultiRegionAccessPoint', - 'AWS::S3::StorageLens', - 'AWS::SES::ConfigurationSet', - 'AWS::SES::ContactList', - 'AWS::SES::ReceiptFilter', - 'AWS::SES::ReceiptRuleSet', - 'AWS::SES::Template', - 'AWS::SNS::Topic', - 'AWS::SQS::Queue', - 'AWS::SSM::AssociationCompliance', - 'AWS::SSM::FileData', - 'AWS::SSM::ManagedInstanceInventory', - 'AWS::SSM::PatchCompliance', - 'AWS::SageMaker::CodeRepository', - 'AWS::SageMaker::Model', - 'AWS::SageMaker::NotebookInstanceLifecycleConfig', - 'AWS::SageMaker::Workteam', - 'AWS::SecretsManager::Secret', - 'AWS::ServiceCatalog::CloudFormationProduct', - 'AWS::ServiceCatalog::CloudFormationProvisionedProduct', - 'AWS::ServiceCatalog::Portfolio', - 'AWS::ServiceDiscovery::HttpNamespace', - 'AWS::ServiceDiscovery::PublicDnsNamespace', - 'AWS::ServiceDiscovery::Service', - 'AWS::Shield::Protection', - 'AWS::ShieldRegional::Protection', - 'AWS::StepFunctions::Activity', - 'AWS::StepFunctions::StateMachine', - 'AWS::Transfer::Workflow', - 'AWS::WAF::RateBasedRule', - 'AWS::WAF::Rule', - 'AWS::WAF::RuleGroup', - 'AWS::WAF::WebACL', - 'AWS::WAFRegional::RateBasedRule', - 'AWS::WAFRegional::Rule', - 'AWS::WAFRegional::RuleGroup', - 'AWS::WAFRegional::WebACL', - 'AWS::WAFv2::IPSet', - 'AWS::WAFv2::ManagedRuleSet', - 'AWS::WAFv2::RegexPatternSet', - 'AWS::WAFv2::RuleGroup', - 'AWS::WAFv2::WebACL', - 'AWS::WorkSpaces::ConnectionAlias', - 'AWS::WorkSpaces::Workspace', - 'AWS::XRay::EncryptionConfig', - ] diff --git a/src/azul/csp.py b/src/azul/csp.py deleted file mode 100644 index 1e6d0fe073..0000000000 --- a/src/azul/csp.py +++ /dev/null @@ -1,215 +0,0 @@ -import base64 -from collections import ( - defaultdict, -) -import logging -import re -import secrets -from typing import ( - Self, -) - -import attrs -from more_itertools import ( - only, - prepend, -) - -from azul import ( - R, -) -from azul.strings import ( - single_quote as sq, -) - -log = logging.getLogger(__name__) - - -@attrs.frozen -class CSP: - directives: dict[str, list[str]] - - @classmethod - def for_azul(cls, nonce: str | None = None) -> Self: - self, none, data = sq('self'), sq('none'), 'data:' - nonce = [] if nonce is None else [sq('nonce-' + nonce)] - return cls({ - 'default-src': [self], - 'img-src': [self, data], - 'script-src': [self, *nonce], - 'style-src': [self, *nonce], - 'frame-ancestors': [none], - 'form-action': [self] - }) - - @classmethod - def new_nonce(cls) -> str: - """ - A random nonce for use in a CSP. - """ - return base64.b64encode(secrets.token_bytes(32)).decode('ascii').rstrip('=') - - @classmethod - def parse(cls, csp: str) -> Self: - """( - - Parse the given CSP or raise RequirementError if it is not syntactically - valid against the specification at https://www.w3.org/TR/CSP2. - - >>> from azul.doctests import ( - ... assert_json, - ... ) - - >>> def parse(s): return CSP.parse(s).directives - - A valid CSP: - - >>> valid_csp = "img-src 'self' data:;frame-ancestors 'none'" - >>> assert_json(parse(valid_csp)) - { - "img-src": [ - "'self'", - "data:" - ], - "frame-ancestors": [ - "'none'" - ] - } - - Insignificant whitespace is removed: - - >>> fluffy_csp = " \timg-src\t'self' data:\t;\tframe-ancestors\t 'none' \t" - >>> parse(valid_csp) == parse(fluffy_csp) - True - - Multiple multiple directives of the same name are consolidated: - - >>> assert_json(parse("img-src data:;img-src 'self':")) - { - "img-src": [ - "data:", - "'self':" - ] - } - - Invalid CSPs: - - >>> parse(";") - Traceback (most recent call last): - ... - AssertionError: R('Invalid directive', '') - - >>> parse('img_src;') - Traceback (most recent call last): - ... - AssertionError: R('Invalid directive', 'img_src') - - >>> parse('img-src a,b') - Traceback (most recent call last): - ... - AssertionError: R('Invalid directive', 'img-src a,b') - """ - # https://www.w3.org/TR/CSP2/#policy-syntax - directive_re = re.compile(r'[ \t]*([a-zA-Z0-9-]+)' - # Space, tab and any visible character - # (0x21-0xFE) except for comma (0x2C) or - # semicolon (0x3B). - r'(?:[ \t]([ \t\x21-\x2B\x2D-\x3A\x3C-\xFE]*))?') - wsp_re = re.compile(r'[ \t]+') - directives: dict[str, list[str]] = defaultdict(list) - for directive in csp.split(';'): - match = directive_re.fullmatch(directive) - assert match is not None, R('Invalid directive', directive) - name, values = match.groups() - values = [] if values is None else filter(None, wsp_re.split(values)) - directives[name].extend(values) - return cls(directives) - - # Matches only Azul nonces, specifically - nonce_re = re.compile(sq(r'nonce-([a-zA-Z0-9+/]{43})')) - - def validate(self): - """ - Validate the directive values against a subset of the Source List - grammar from the specification. Of that grammar, only the productions - used in CSPs for Azul are supported. - - >>> def validate(s): return CSP.parse(s).validate() - - >>> valid = ('0a+/' * 11)[:43] - >>> validate(f"script-src 'self' 'nonce-{valid}'") - - Disallowed characters in nonce: - - >>> invalid = valid.replace('+','*') - >>> validate(f"script-src 'self' 'nonce-{invalid}'") - Traceback (most recent call last): - ... - AssertionError: R('Invalid value', "'nonce-0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*'") - - Nonce is too short: - - >>> invalid = valid[:-1] - >>> validate(f"script-src 'self' 'nonce-{invalid}'") - Traceback (most recent call last): - ... - AssertionError: R('Invalid value', "'nonce-0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a'") - - Nonce is too long: - - >>> invalid = valid + '/' - >>> validate(f"script-src 'self' 'nonce-{invalid}'") - Traceback (most recent call last): - ... - AssertionError: R('Invalid value', "'nonce-0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/'") - - Other invalid combinations: - - >>> validate("frame-ancestors 'none' 'none'") - Traceback (most recent call last): - ... - AssertionError: R("'none' must appear alone", ["'none'", "'none'"]) - - >>> validate("frame-ancestors 'self' 'none'") - Traceback (most recent call last): - ... - AssertionError: R("'none' must appear alone", ["'self'", "'none'"]) - - >>> validate("img-src 'self' data: 'self'") - Traceback (most recent call last): - ... - AssertionError: R('Duplicated value', ["'self'", 'data:', "'self'"]) - """ - self_, none, data = sq('self'), sq('none'), 'data:' - value_res = prepend(self.nonce_re.pattern, map(re.escape, [self_, none, data])) - value_re = re.compile('|'.join(value_res)) - for name, values in self.directives.items(): - for value in values: - match = value_re.fullmatch(value) - assert match is not None, R('Invalid value', value) - assert values == [none] or none not in values, R( - f'{none} must appear alone', values) - assert len(values) == len(set(values)), R('Duplicated value', values) - - def nonce(self) -> str | None: - """ - Extract the Azul nonce from this CSP, if present. If there are multiple - occurrances of a nonce, they must all be equal. - """ - return only(set( - value - for name, values in self.directives.items() - for value in values - if self.nonce_re.fullmatch(value) is not None - )) - - def __str__(self) -> str: - """ - >>> s = "img-src 'self' data:;frame-ancestors 'none'" - >>> s == str(CSP.parse(s)) - True - """ - return ';'.join( - ' '.join(value for value in prepend(name, values)) - for name, values in self.directives.items() - ) diff --git a/src/azul/digests.py b/src/azul/digests.py deleted file mode 100644 index 99e089aca3..0000000000 --- a/src/azul/digests.py +++ /dev/null @@ -1,48 +0,0 @@ -import base64 -import pickle -from typing import ( - Any, - Literal, - TYPE_CHECKING, -) - -import attrs -import resumablesha256 - -from azul import ( - R, -) - -if TYPE_CHECKING: - class Hasher: - - def hexdigest(self) -> str: ... - - def update(self, data: bytes, /) -> None: ... -else: - Hasher = Any - - -def get_resumable_hasher(digest_type: str) -> Hasher: - assert digest_type == 'sha256', R('Only sha256 is currently supported') - return resumablesha256.sha256() - - -def hasher_to_str(hasher: Hasher) -> str: - return base64.b64encode(pickle.dumps(hasher)).decode('ascii') - - -def hasher_from_str(s: str) -> Hasher: - return pickle.loads(base64.b64decode(s)) - - -@attrs.frozen(kw_only=True) -class Digest: - """ - A hexadecimal digest of a sequence of bytes, and the type of algorithm used - to produce said digest. The set of supported algorithms is limited to those - we believe to present an acceptable risk of hash collisions. - """ - - type: Literal['sha256', 'sha1', 'md5'] - value: str diff --git a/src/azul/docker.py b/src/azul/docker.py deleted file mode 100644 index c5e0c64aa8..0000000000 --- a/src/azul/docker.py +++ /dev/null @@ -1,740 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from base64 import ( - b64decode, - b64encode, - urlsafe_b64encode, -) -from collections import ( - defaultdict, -) -from contextlib import ( - contextmanager, -) -from hashlib import ( - sha1, - sha256, -) -import json -import logging -import os -import re -import subprocess -import tempfile -from typing import ( - Any, - Iterable, - Literal, - Optional, - Self, - TypedDict, - cast, -) - -import attrs -import docker -from docker.models.images import ( - Image, -) -from dxf import ( - DXF, - DXFBase, -) -from more_itertools import ( - one, - padded, -) -import requests - -from azul import ( - R, - cache, - cached_property, - config, -) -from azul.types import ( - JSONs, - json_int, - json_str, -) - -log = logging.getLogger(__name__) - - -@attrs.define(frozen=True) -class ImageRef(metaclass=ABCMeta): - """ - A fully qualified reference to a Docker image in a registry. - - Does not support any abbreviations such as omitting the registry (defaulting - to ``docker.io``), username (defaulting to ``library``) or tag (defaulting - to ``latest``). - """ - - #: The part before the first slash. This is usually the domain name of image - #: registry e.g., ``"docker.io"`` - registry: str - - #: The part between the first and second slash. This is usually the name of - #: the user or organisation owning the image. It can also be a generic term - #: such as ``"library"``. - username: str - - #: The part after the second slash, split on the remaining slashes. Will - #: have at least one element. - repository: tuple[str, ...] - - @classmethod - def parse(cls, image_ref: str) -> 'ImageRef': - """ - >>> ImageRef.parse('2@1') - DigestImageRef(registry='docker.io', username='library', repository=('2',), digest='1') - >>> ImageRef.parse('3/2:1') - TagImageRef(registry='docker.io', username='3', repository=('2',), tag='1') - >>> ImageRef.parse('4/3/2:1') - TagImageRef(registry='4', username='3', repository=('2',), tag='1') - >>> ImageRef.parse('5/4/3/2:1') - TagImageRef(registry='5', username='4', repository=('3', '2'), tag='1') - >>> ImageRef.parse('localhost:5000/docker.io/ucscgi/azul-pycharm:2023.3.4-15') - ... # doctest: +NORMALIZE_WHITESPACE - TagImageRef(registry='localhost:5000', - username='docker.io', - repository=('ucscgi', 'azul-pycharm'), - tag='2023.3.4-15') - """ - if '@' in image_ref: - return DigestImageRef.parse(image_ref) - else: - return TagImageRef.parse(image_ref) - - @classmethod - def _create(cls, name: str, **kwargs) -> Self: - name = name.split('/') - if len(name) == 1: - registry, username, repository = 'docker.io', 'library', name - elif len(name) == 2: - registry, (username, *repository) = 'docker.io', name - elif len(name) > 2: - registry, username, *repository = name - else: - assert False - # noinspection PyArgumentList - return cls(registry=registry, - username=username, - repository=tuple(repository), - **kwargs) - - @property - def name(self): - """ - The name of the image, starting with the registry, up to, but not - including, the tag. - """ - return '/'.join((self.registry, self.relative_name)) - - @property - def relative_name(self): - """ - The name of the image relative to the registry. - """ - return '/'.join((self.username, *self.repository)) - - @property - def registry_host(self): - """ - Same as :py:attr:``registry`` with hacks for DockerHub. - - https://github.com/docker/cli/issues/3793#issuecomment-1269051403 - """ - registry = self.registry - return 'registry-1.docker.io' if registry == 'docker.io' else registry - - def with_digest(self, digest: str) -> 'DigestImageRef': - return DigestImageRef.create(self.name, digest) - - def with_tag(self, tag: str) -> 'TagImageRef': - return TagImageRef.create(self.name, tag) - - ecr_registry_host_re = re.compile(r'[\d]+\.dkr\.ecr\.[^.]+\.amazonaws\.com') - - @property - def is_mirrored(self) -> bool: - return self.ecr_registry_host_re.fullmatch(self.registry_host) is not None - - def port_to(self, registry: str) -> Self: - """ - >>> ref = ImageRef.parse('a/b/c:d') - >>> ref.port_to('e') - TagImageRef(registry='e', username='a', repository=('b', 'c'), tag='d') - >>> ref.port_to('') - TagImageRef(registry='a', username='b', repository=('c',), tag='d') - >>> ref.port_to('a') - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Reference already ported to registry', - TagImageRef(registry='a', username='b', repository=('c',), tag='d'), - 'a') - """ - if registry: - assert self.registry != registry, R( - 'Reference already ported to registry', - self, registry) - other = type(self).parse(registry + '/' + str(self)) - assert isinstance(other, type(self)) - return other - else: - return self - - def port_from(self, registry: str) -> Self: - """ - >>> ref = ImageRef.parse('a/b/c:d') - >>> ref.port_to('e').port_from('e') - TagImageRef(registry='a', username='b', repository=('c',), tag='d') - >>> ref.port_to('').port_from('') - TagImageRef(registry='a', username='b', repository=('c',), tag='d') - >>> ref.port_from('e') - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Reference does not use the registry to port from', - TagImageRef(registry='a', username='b', repository=('c',), tag='d'), 'e') - """ - if registry: - assert self.registry == registry, R( - 'Reference does not use the registry to port from', - self, registry) - other = type(self).parse(str(self).removeprefix(registry + '/')) - assert isinstance(other, type(self)) - return other - else: - return self - - @property - def auth_server_url(self) -> str: - """ - The Docker client tracks credentials in ~/.docker/config.json using the - URL or hostname of the server requesting authentication. Similarly, the - credential helpers expect the same value on stdandard input. This method - returns that value for this repository. - """ - if self.registry == 'docker.io': - return 'https://index.docker.io/v1/' - else: - return self.registry_host - - @property - def tf_repository(self): - """ - A string suitable for identifying (in Terraform config) the ECR - repository resource holding this image. - """ - hash = urlsafe_b64encode(sha1(self.name.encode()).digest()).decode()[:-1] - return 'repository_' + hash - - @property - def tf_alnum_repository(self): - """ - An alphanumeric string suitable for identifying (in Terraform config) - the ECR repository resource holding this image. Unlike `tf_repository`, - the string may only contain characters in [0-9a-zA-Z]. - """ - return 'repository' + sha1(self.name.encode()).hexdigest() - - @property - def tf_image(self): - """ - A string suitable for identifying (in Terraform config) any resource - specific to this image. - """ - hash = urlsafe_b64encode(sha1(str(self).encode()).digest()).decode()[:-1] - return 'image_' + hash - - @property - @abstractmethod - def qualifier(self) -> str: - raise NotImplementedError - - -@attrs.define(frozen=True) -class DigestImageRef(ImageRef): - """ - A fully qualified and stable reference to a Docker image in a registry. - """ - - #: The part after the '@', a hash of the image manifest. While it uniquely - #: identifies an image within a registry, it is not consistent accross - #: registries. The same image can have different digests in different - #: registries. - digest: str - - @classmethod - def parse(cls, image_ref: str) -> Self: - name, digest = image_ref.split('@') - return cls.create(name, digest) - - @classmethod - def create(cls, name: str, digest: str) -> Self: - return super()._create(name, digest=digest) - - def __str__(self) -> str: - """ - The inverse of :py:meth:`parse`. - """ - return self.name + '@' + self.digest - - @property - def qualifier(self) -> str: - return self.digest - - -@attrs.define(frozen=True) -class TagImageRef(ImageRef): - """ - A fully qualified reference to a tagged Docker image in a registry. - """ - - #: The part after the colon in an image name. This is the name of a tag - #: associated with the image. Tags refer to digests and are mutable. For a - #: stable references to images in a registry use :py:class:`DigestImageRef`. - tag: str - - @classmethod - def parse(cls, image_ref: str) -> Self: - # A colon in the first part of the name might separate host and port - name, _, tag = image_ref.rpartition(':') - return cls.create(name, tag) - - @classmethod - def create(cls, name: str, tag: str) -> Self: - return super()._create(name, tag=tag) - - def __str__(self) -> str: - """ - The inverse of :py:meth:`parse`. - """ - return self.name + ':' + self.tag - - @property - def qualifier(self) -> str: - return self.tag - - -@attrs.define(frozen=True) -class Platform: - os: str - arch: str - variant: Optional[str] - - def normalize(self) -> Self: - os = _normalize_os(self.os) - arch, variant = _normalize_arch(self.arch, self.variant) - return attrs.evolve(self, os=os, arch=arch, variant=variant) - - @classmethod - def parse(cls, platform: str) -> Self: - os, arch, variant = padded(platform.split('/'), None, 3) - assert os, R('Invalid operating system', platform) - assert arch, R('Invalid architecture', platform) - assert variant is None or variant, R('Invalid variant', platform) - return cls(os=os, arch=arch, variant=variant) - - @classmethod - def from_json(cls, platform, config: bool = False) -> Self: - def case(s): - return s.capitalize() if config else s - - return cls(os=platform[case('os')], - arch=platform[case('architecture')], - variant=platform.get(case('variant'))) - - def __str__(self) -> str: - result = [self.os, self.arch] - if self.variant is not None: - result.append(self.variant) - return '/'.join(result) - - -images_by_alias = { - alias: TagImageRef.parse(spec['ref']) - for alias, spec in config.docker_images.items() -} - -images = images_by_alias.values() - -platforms = list(map(Platform.parse, config.docker_platforms)) - -images_by_name: dict[str, list] = defaultdict(list) -for image in images: - images_by_name[image.name].append(image) -del image - -images_by_tf_repository: dict[tuple[str, str], list[TagImageRef]] = { - (name, one(set(image.tf_repository for image in images))): images - for name, images in images_by_name.items() -} - - -# https://github.com/containerd/containerd/blob/1fbd70374134b891f97ce19c70b6e50c7b9f4e0d/platforms/database.go#L62 - -def _normalize_os(os: str) -> str: - os = os and os.lower() - if os == 'macos': - os = 'darwin' - return os - - -# https://github.com/containerd/containerd/blob/1fbd70374134b891f97ce19c70b6e50c7b9f4e0d/platforms/database.go#L76 - -def _normalize_arch(arch: str, - variant: Optional[str] - ) -> tuple[str, Optional[str]]: - arch = arch.lower() - variant = variant and variant.lower() - if arch == 'i386': - arch = '386' - variant = None - elif arch in ('x86_64', 'x86-64', 'amd64'): - arch = 'amd64' - if variant == 'v1': - variant = None - elif arch in ('aarch64', 'arm64'): - arch = 'arm64' - if variant in ('8', 'v8'): - variant = None - elif arch == 'armhf': - arch = 'arm' - variant = 'v7' - elif arch == 'armel': - arch = 'arm' - variant = 'v6' - elif arch == 'arm': - if variant in (None, '7'): - variant = 'v7' - elif variant in ('5', '6', '8'): - variant = 'v' + variant - return arch, variant - - -class Gist(TypedDict): - """ - Represents an image manifest or a blob, or any Docker artifact with a digest - """ - - #: A hash of the content, typically starting in `sha256:` - digest: str - - -class ImageGist(Gist): - """ - A Docker image - """ - #: Type of system to run the image on, as in `os/arch` or `os/arch/variant` - platform: str - - #: The hash of the image config JSON, most likely starting in `sha256:`. - #: This is consistent accross registries and includes the hashes of the - #: uncompressed, binary content of the image, and is commonly referred to as - #: the "image ID". - id: str - - -class IndexImageGist(Gist): - """ - A multi-platform image, also known as an image index - """ - #: While the inherited ``digest`` property pertains to the original - #: registry, this property contains the digest of the image in the mirror - #: registry, i.e. ECR. Even though the digests of the platform-specific - #: parts of a multi-platform image are the same in both registries, the - #: digest of the mirrored multi-platform image usually differs from the - #: original because 1) the mirror only includes a subset of the original - #: parts and 2) the digest algorithm is generally sensitive to insignificant - #: JSON differences in whitespace or property order. - mirror_digest: str - - #: The images in the list, by platform (`os/arch` or `os/arch/variant`) - parts: dict[str, ImageGist] - - -@attrs.define(frozen=True, slots=False) -class Repository: - image_ref: ImageRef - - @cached_property - def host(self) -> str: - return self.image_ref.registry_host - - @cached_property - def name(self) -> str: - return self.image_ref.relative_name - - @classmethod - def get_gists(cls) -> dict[str, ImageGist | IndexImageGist]: - gists = {} - for alias, ref in images_by_alias.items(): - log.info('Getting information for %r (%s)', alias, ref) - repository = cls(ref) - digest = repository.get_tag(ref.tag) - gists[str(ref)] = repository.get_gist(digest) - return gists - - def get_tag(self, tag: str) -> str: - """ - Return the manifest digest associated with the given tag. - """ - log.info('Getting tag %r', tag) - digest, _ = self._client.head_manifest_and_response(tag) - return digest - - def get_gist(self, digest: str) -> ImageGist | IndexImageGist: - """ - Return the manifest for the given digest. - """ - log.info('Getting manifest %r', digest) - manifest, _ = self._client.get_manifest_and_response(digest) - manifest = json.loads(manifest) - match manifest['mediaType']: - case ('application/vnd.oci.image.index.v1+json' - | 'application/vnd.docker.distribution.manifest.list.v2+json'): - parts = self._get_mirrored_parts(manifest['manifests']) - mirror_manifest = ImageIndexManifest.create({ - platform: ImageIndexPart(digest=part['digest'], size=size) - for platform, (part, size) in parts.items() - }) - return { - 'digest': digest, - 'mirror_digest': mirror_manifest.digest, - 'parts': {str(platform): part for platform, (part, size) in parts.items()} - } - case ('application/vnd.docker.distribution.manifest.v2+json' - | 'application/vnd.oci.image.manifest.v1+json'): - config_digest = manifest['config']['digest'] - config = json.loads(self.get_blob(config_digest)) - return { - 'digest': digest, - 'id': config_digest, - 'platform': str(Platform.from_json(config).normalize()) - } - case media_type: - raise NotImplementedError(media_type) - - def _get_mirrored_parts(self, - manifests: JSONs - ) -> dict[Platform, tuple[ImageGist, int]]: - gists = {} - for manifest in manifests: - platform = Platform.from_json(manifest['platform']).normalize() - if platform in platforms: - digest, size = json_str(manifest['digest']), json_int(manifest['size']) - gist = self.get_gist(digest) - assert gist.get('platform') == str(platform), R( - 'Inconsistent platform between manifest and manifest list', - manifest, gist) - gists[platform] = cast(ImageGist, gist), size - return gists - - def get_blob(self, digest: str) -> bytes: - """ - Return the content for the given digest. - """ - log.info('Getting blob %r', digest) - chunks = self._client.pull_blob(digest) - return b''.join(chunks) - - @cached_property - def _client(self): - return DXF(host=self.host, - repo=self.name, - auth=self._dxf_auth, - insecure=self.host.startswith('localhost:') or self.host == 'localhost') - - def _dxf_auth(self, dxf: DXFBase, response: requests.Response): - username, password = self._auth - dxf.authenticate(username=username, - password=password, - response=response) - - @cached_property - def _auth(self) -> tuple[str, str]: - auth_server_url = self.image_ref.auth_server_url - with open(os.path.expanduser('~/.docker/config.json')) as f: - config = json.load(f) - try: - creds_store = config['credsStore'] - except KeyError: - return self._decode_auth(config['auths'][auth_server_url]['auth']) - else: - command = 'docker-credential-' + creds_store - output = subprocess.check_output(args=[command, 'get'], - input=auth_server_url.encode('ascii')) - credentials = json.loads(output) - return credentials['Username'], credentials['Secret'] - - @property - def encoded_auth(self) -> str: - return self._encode_auth(*self._auth) - - def _decode_auth(self, auth: str) -> tuple[str, str]: - auth = b64decode(auth.encode('ascii')).decode() - username, _, secret = auth.partition(':') - return username, secret - - def _encode_auth(self, username: str, secret: str) -> str: - auth = username + ':' + secret - return b64encode(auth.encode()).decode('ascii') - - @classmethod - @contextmanager - def temporary_auth_file(cls, *refs: ImageRef): - """ - While some utilities in the Docker/OCI ecosystem are able to read - plain-text credentials from the Docker client's configuration file - (~/.docker/config.json), they often lack support for the credential - helpers that can be configured there. Removing the credStore entry from - that configiguration file would disable these helpers, but a prominent - Docker client distribution (Docker Desktop for macOS and Windows) - reinserts the entry every time it starts up. - - This context manager provides a temporary containers-auth.json [1] with - plain-text credentials for the repositories hosting the given images. - The credentials are obtained by extracting plain-text credentials from - ~/.docker/config.json or by invoking the credStore helper configured - there. The path to the temporary file is passed to the context on entry - and the file is deleted when the context is exited. - - [1] https://github.com/containers/image/blob/main/docs/containers-auth.json.5.md - """ - with tempfile.NamedTemporaryFile() as auth_file: - auths = { - 'auths': { - ref.auth_server_url: { - 'auth': cls(ref).encoded_auth - } - for ref in refs - }, - } - auth_file.write(json.dumps(auths).encode()) - auth_file.flush() - yield auth_file.name - - -@attrs.frozen(kw_only=True) -class ImageIndexPart: - digest: str - size: int - - -@attrs.frozen(kw_only=True) -class ImageIndexManifest: - json: str - digest: str - - @classmethod - def create(cls, parts: dict[Platform, ImageIndexPart]) -> Self: - manifest = { - 'schemaVersion': 2, - 'mediaType': 'application/vnd.docker.distribution.manifest.list.v2+json', - 'manifests': [ - { - 'mediaType': 'application/vnd.docker.distribution.manifest.v2+json', - 'size': part.size, - 'digest': part.digest, - 'platform': { - 'architecture': platform.arch, - 'os': platform.os - } - } - for platform, part in parts.items() - ] - } - manifest = json.dumps(manifest, indent=4) - digest = 'sha256:' + sha256(manifest.encode()).hexdigest() - return cls(json=manifest, digest=digest) - - -def pull_docker_image(ref: ImageRef) -> Image: - return _push_or_pull(ref, 'pull') - - -def push_docker_image(ref: ImageRef) -> Image: - return _push_or_pull(ref, 'push') - - -def _push_or_pull(ref: ImageRef, - direction: Literal['push'] | Literal['pull'] - ) -> Image: - log.info('%sing image %r …', direction.capitalize(), ref) - client = docker.client.from_env() - # Despite its name, the `tag` keyword argument can be a digest, too - method = getattr(client.api, direction) - output = method(ref.name, tag=ref.qualifier, stream=True) - log_lines(ref, direction, output) - log.info('%sed image %r', direction.capitalize(), ref) - return client.images.get(str(ref)) - - -def log_lines(context: Any, command: str, output: Iterable[bytes]): - for line in output: - log.debug('%s: docker %s %s', context, command, line.decode().strip()) - - -def get_docker_image_gist(ref: TagImageRef) -> ImageGist | IndexImageGist: - return get_docker_image_gists()[str(ref)] - - -def get_docker_image_gists() -> dict[str, ImageGist | IndexImageGist]: - with open(config.docker_image_gists_path) as f: - return json.load(f) - - -@cache -def resolve_docker_image_for_launch(alias: str) -> str: - """ - Return an image reference that can be used to launch a container from the - image with the given alias. The alias is the top level key in the JSON - object contained in the environment variable `azul_docker_images`. - """ - ref_to_pull, gist = resolve_docker_image_for_pull(alias) - image = pull_docker_image(ref_to_pull) - # In either case, the verification below ensures that the image we pulled - # has the expected ID. - try: - parts = cast(IndexImageGist, gist)['parts'] - except KeyError: - # For single-platform images, this is straight forward. - assert image.id == cast(ImageGist, gist)['id'] - else: - # To determine the expected ID for images that are part of a multi- - # platform image aka "manifest list" aka "image index", we need to know - # what specific platform was pulled since we left it to Docker to - # determine the best match. - platform = Platform.from_json(image.attrs, config=True).normalize() - assert image.id == parts[str(platform)]['id'] - # Returning the image ID means that the container will be launched using - # exactly the image we just pulled and verified. - return image.id - - -def resolve_docker_image_for_pull(alias: str - ) -> tuple[DigestImageRef, ImageGist | IndexImageGist]: - """ - Return a reference to, and the gist of, the image with the given alias, for - the purpose of pulling said image. - """ - ref = TagImageRef.parse(config.docker_images[alias]['ref']) - log.info('Resolving %r image %r …', alias, ref) - gist = get_docker_image_gist(ref) - ref = ref.port_to(config.docker_registry) - # For multi-arch images, we need to use the digest of the mirrored image, if - # we're pulling from a mirror. For single-arch images, the digest is the - # same between the upstream and mirror registries. - if 'parts' in gist and ref.is_mirrored: - digest = cast(IndexImageGist, gist)['mirror_digest'] - else: - digest = gist['digest'] - ref = ref.with_digest(digest) - log.info('Resolved %r image to %r', alias, ref) - return ref, gist diff --git a/src/azul/doctests.py b/src/azul/doctests.py deleted file mode 100644 index 328ed91d3a..0000000000 --- a/src/azul/doctests.py +++ /dev/null @@ -1,27 +0,0 @@ -import json - -from azul.types import ( - AnyJSON, -) - - -def assert_json(j: AnyJSON): - """ - Makes it easier to assert JSON in doctests. The argument is checked for - validity and pretty-printed with and indent of four spaces. - - >>> assert_json(dict(foo=[None], bar=42)) - { - "foo": [ - null - ], - "bar": 42 - } - - Compare this with a traditional doctest assertion which has to fit on a - single line: - - >>> dict(foo=[None], bar=42) - {'foo': [None], 'bar': 42} - """ - print(json.dumps(j, indent=4)) diff --git a/src/azul/drs.py b/src/azul/drs.py deleted file mode 100644 index ff392f1b0e..0000000000 --- a/src/azul/drs.py +++ /dev/null @@ -1,320 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - namedtuple, -) -from collections.abc import ( - Mapping, - Sequence, -) -from enum import ( - Enum, -) -import json -import logging -import time -from typing import ( - Self, -) - -import attr -from furl import ( - furl, -) -from more_itertools import ( - one, -) -import urllib3.request - -from azul import ( - R, - cache, - cached_property, - mutable_furl, -) -from azul.http import ( - HasCachedHttpClient, - LimitedRetryHttpClient, - Propagate429HttpClient, -) -from azul.types import ( - MutableJSON, - json_dict, - json_list, - json_str, -) - -log = logging.getLogger(__name__) - - -def drs_object_uri(*, - base_url: furl, - path: Sequence[str], - params: Mapping[str, str] - ) -> mutable_furl: - assert ':' not in base_url.netloc - return furl(url=base_url, scheme='drs', path=path, args=params) - - -def drs_object_url_path(*, object_id: str, access_id: str | None = None) -> str: - """ - >>> drs_object_url_path(object_id='abc') - '/ga4gh/drs/v1/objects/abc' - - >>> drs_object_url_path(object_id='abc', access_id='123') - '/ga4gh/drs/v1/objects/abc/access/123' - """ - drs_url = '/ga4gh/drs/v1/objects' - return '/'.join(( - drs_url, - object_id, - *(('access', access_id) if access_id else ()) - )) - - -def dos_object_url_path(object_id: str) -> str: - return f'/ga4gh/dos/v1/dataobjects/{object_id}' - - -class AccessMethod(namedtuple('AccessMethod', 'scheme replica'), Enum): - https = 'https', 'aws' - gs = 'gs', 'gcp' - - def __str__(self) -> str: - return self.name - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class Access: - method: AccessMethod - url: str - headers: Mapping[str, str] | None = None - - -class DRSURI(metaclass=ABCMeta): - - @classmethod - def parse(cls, drs_uri: str) -> 'DRSURI': - prefix = 'drs://' - assert drs_uri.startswith(prefix), R('Invalid DRS uri scheme', drs_uri) - # "The colon character is not allowed in a hostname-based DRS URI". - # - # https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/#_drs_uris - # - subcls = CompactDRSURI if drs_uri.find(':', len(prefix)) >= 0 else RegularDRSURI - return subcls.parse(drs_uri) - - @abstractmethod - def to_url(self, client: 'DRSClient', access_id: str | None = None) -> furl: - """ - Translate the DRS URI into a DRS URL. All query params included in the - DRS URI (eg '{drs_uri}?version=123') will be carried over to the DRS URL. - """ - raise NotImplementedError - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True, slots=True) -class RegularDRSURI(DRSURI): - uri: furl - - def __attrs_post_init__(self): - assert self.uri.scheme == 'drs', self.uri - - @classmethod - def parse(cls, drs_uri: str) -> Self: - return cls(uri=furl(drs_uri)) - - def to_url(self, client: 'DRSClient', access_id: str | None = None) -> furl: - url = self.uri.copy().set(scheme='https') - url.set(path=drs_object_url_path(object_id=one(self.uri.path.segments), - access_id=access_id)) - return url - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True, slots=True) -class CompactDRSURI(DRSURI): - """ - So-called DRS "URIs" [1] for Compact Identifiers [2] are NOT URIs according - to RFC 3986 [3] so we can't use off-the-shelf URI parsers. - - [1] https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.3.0/docs/ - - [2] https://www.nature.com/articles/sdata201829 - - [3] https://datatracker.ietf.org/doc/html/rfc3986 - """ - namespace: str - accession: str - - def __attrs_post_init__(self): - assert '/' not in self.namespace and '?' not in self.accession, self - - @classmethod - def parse(cls, drs_uri: str) -> Self: - scheme, netloc = drs_uri.split('://', 1) - # Compact identifier-based URIs can be hard to parse when following - # RFC3986, with the 'namespace:accession' part matching either the - # heir-part or path production depending if the optional provider code - # and following slash is included. - # - # https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/#compact-identifier-based-drs-uris - # - prefix, accession = netloc.split(':', 1) - assert '/' not in prefix, R( - 'Compact identifiers with provider codes are not supported', drs_uri) - assert '?' not in accession, R( - 'Compact identifiers must not contain query parameters', drs_uri) - return cls(namespace=prefix, - accession=accession) - - def to_url(self, client: 'DRSClient', access_id: str | None = None) -> furl: - url = client.id_client.resolve(self.namespace, self.accession) - # The URL pattern registered at identifiers.org ought to replicate the - # DRS spec, but we have to re-create the path using the spec because the - # registered pattern does not support embedding the access ID. - assert str(url.path) == drs_object_url_path(object_id=self.accession), R( - 'Unexpected DRS URL format', url) - url.set(path=drs_object_url_path(object_id=self.accession, access_id=access_id)) - return url - - -class IdentifiersDotOrgClient(HasCachedHttpClient): - - def _create_http_client(self) -> urllib3.request.RequestMethods: - return Propagate429HttpClient( - LimitedRetryHttpClient( - super()._create_http_client() - ) - ) - - def resolve(self, prefix: str, accession: str) -> mutable_furl: - namespace_id = self._prefix_to_namespace(prefix) - log.info('Resolved prefix %r to namespace ID %r', prefix, namespace_id) - resource_name, url_pattern = self._namespace_to_host(namespace_id) - log.info('Obtained URL pattern %r from resource %r', url_pattern, resource_name) - placeholder = '{$id}' - assert placeholder in url_pattern, R( - 'Missing accession placeholder in URL pattern', url_pattern) - url = url_pattern.replace(placeholder, accession) - return furl(url) - - _api_url = 'https://registry.api.identifiers.org/restApi/' - - @cache - def _prefix_to_namespace(self, prefix: str) -> str: - prefix_info = self._api_request('namespaces/search/findByPrefix', prefix=prefix) - href = json_str(json_dict(json_dict(prefix_info['_links'])['self'])['href']) - return furl(href).path.segments[-1] - - @cache - def _namespace_to_host(self, namespace_id: str) -> tuple[str, str]: - namespace_info = self._api_request('resources/search/findAllByNamespaceId', - id=namespace_id) - resources = json_list(json_dict(namespace_info['_embedded'])['resources']) - resource = json_dict(one(resources)) - return json_str(resource['name']), json_str(resource['urlPattern']) - - def _api_request(self, path: str, **args) -> MutableJSON: - url = furl(self._api_url).add(path=path, args=args) - response = self._http_client.request('GET', str(url)) - if response.status == 200: - return json.loads(response.data) - else: - raise DRSStatusException(url, response) - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class DRSClient: - _http_client: urllib3.request.RequestMethods - - @cached_property - def id_client(self) -> IdentifiersDotOrgClient: - return IdentifiersDotOrgClient() - - def get_object(self, - drs_uri: str, - access_method: AccessMethod = AccessMethod.https - ) -> Access: - """ - Returns access to the content of the data object identified by the - given URI. The scheme of the URL in the returned access object depends - on the access method specified. - """ - return self._get_object(drs_uri, access_method) - - def _get_object(self, drs_uri: str, access_method: AccessMethod) -> Access: - url = DRSURI.parse(drs_uri).to_url(self) - while True: - response = self._request(url) - if response.status == 200: - # Bundles are not supported therefore we can expect 'access_methods' - response_data = json_dict(json.loads(response.data)) - access_methods = map(json_dict, json_list(response_data['access_methods'])) - method = one(m for m in access_methods if m['type'] == access_method.scheme) - access_url = json_dict(method.get('access_url')) - access_id = json_str(method.get('access_id')) - if access_url is not None and access_id is not None: - # TDR quirkily uses the GS access method to provide both a - # GS access URL *and* an access ID that produces an HTTPS - # signed URL - # - # https://github.com/ga4gh/data-repository-service-schemas/issues/360 - # https://github.com/ga4gh/data-repository-service-schemas/issues/361 - assert access_method is AccessMethod.gs, R( - 'Unexpected access method', access_method) - return self._get_object_access(drs_uri, access_id, AccessMethod.https) - elif access_id is not None: - return self._get_object_access(drs_uri, access_id, access_method) - elif access_url is not None: - scheme = furl(access_url['url']).scheme - assert scheme == access_method.scheme, R( - 'Unexpected access URL scheme', scheme) - # We can't convert the signed URL into a furl object since - # the path can contain `%3A` which furl converts to `:` - return Access(method=access_method, - url=access_url['url']) - else: - assert False, R("'access_url' and 'access_id' are both missing") - elif response.status == 202: - wait_time = int(response.headers['retry-after']) - time.sleep(wait_time) - else: - raise DRSStatusException(url, response) - - def _get_object_access(self, - drs_uri: str, - access_id: str, - access_method: AccessMethod - ) -> Access: - url = DRSURI.parse(drs_uri).to_url(self, access_id) - while True: - response = self._request(url) - if response.status == 200: - response_data = json_dict(json.loads(response.data)) - scheme = furl(response_data['url']).scheme - assert scheme == access_method.scheme, R( - 'Unexpected access URL scheme', scheme) - access_url = json_str(response_data['url']) - headers = response_data.get('headers') - if headers is None: - access_headers = None - else: - access_headers = {k: json_str(v) for k, v in json_dict(headers).items()} - return Access(method=access_method, url=access_url, headers=access_headers) - elif response.status == 202: - wait_time = int(response.headers['retry-after']) - time.sleep(wait_time) - else: - raise DRSStatusException(url, response) - - def _request(self, url: furl) -> urllib3.HTTPResponse: - return self._http_client.request('GET', str(url), redirect=False) - - -class DRSStatusException(Exception): - - def __init__(self, url: furl, response: urllib3.HTTPResponse) -> None: - super().__init__(f'Unexpected response from {url}', - response.status, response.data) diff --git a/src/azul/dss.py b/src/azul/dss.py deleted file mode 100644 index bf1ca6baf5..0000000000 --- a/src/azul/dss.py +++ /dev/null @@ -1,45 +0,0 @@ -from datetime import ( - datetime, -) -import logging - -log = logging.getLogger(__name__) - -version_format = '%Y-%m-%dT%H%M%S.%fZ' - - -def new_version(): - # FIXME: DeprecationWarning for datetime methods in Python 3.12 - # https://github.com/DataBiosphere/azul/issues/5953 - return datetime.utcnow().strftime(version_format) - - -def validate_version(version: str): - """ - >>> validate_version('2018-10-18T150431.370880Z') - '2018-10-18T150431.370880Z' - - >>> validate_version('2018-10-18T150431.0Z') - Traceback (most recent call last): - ... - ValueError: ('2018-10-18T150431.0Z', '2018-10-18T150431.000000Z') - - >>> validate_version(' 2018-10-18T150431.370880Z') - Traceback (most recent call last): - ... - ValueError: time data ' 2018-10-18T150431.370880Z' does not match format '%Y-%m-%dT%H%M%S.%fZ' - - >>> validate_version('2018-10-18T150431.370880') - Traceback (most recent call last): - ... - ValueError: time data '2018-10-18T150431.370880' does not match format '%Y-%m-%dT%H%M%S.%fZ' - - >>> validate_version('2018-10-187150431.370880Z') - Traceback (most recent call last): - ... - ValueError: time data '2018-10-187150431.370880Z' does not match format '%Y-%m-%dT%H%M%S.%fZ' - """ - reparsed_version = datetime.strptime(version, version_format).strftime(version_format) - if version != reparsed_version: - raise ValueError(version, reparsed_version) - return version diff --git a/src/azul/enums.py b/src/azul/enums.py deleted file mode 100644 index 02e11d724e..0000000000 --- a/src/azul/enums.py +++ /dev/null @@ -1,14 +0,0 @@ -import enum -from typing import ( - TYPE_CHECKING, -) - -if TYPE_CHECKING: - def auto(): - """ - https://youtrack.jetbrains.com/issue/PY-53388/PyCharm-thinks-enumauto-needs-an-argument#focus=Comments-27-6302771.0-0 - """ - # noinspection PyArgumentList - return enum.auto() -else: - auto = enum.auto diff --git a/src/azul/es.py b/src/azul/es.py deleted file mode 100644 index 09cac18b76..0000000000 --- a/src/azul/es.py +++ /dev/null @@ -1,241 +0,0 @@ -import logging -from typing import ( - Any, - Collection, - Mapping, - Tuple, - cast, -) -from urllib.parse import ( - urlencode, -) - -from aws_requests_auth.boto_utils import ( - BotoAWSRequestsAuth, -) -from opensearchpy import ( - Connection, - OpenSearch, - Urllib3HttpConnection, -) -import requests -import requests.auth -import urllib3.request - -from azul import ( - config, - lru_cache, -) -from azul.deployment import ( - aws, -) -from azul.logging import ( - es_log, - http_body_log_message, -) - -log = logging.getLogger(__name__) - - -class CachedBotoAWSRequestsAuth(BotoAWSRequestsAuth): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # We use the botocore session from Boto3 since it is pre-configured by - # envhook.py to use cached credentials for the AssumeRoleProvider. This - # avoids repeated entry of MFA tokens when running this code locally. - # noinspection PyProtectedMember - credentials = aws.boto3_session.get_credentials() - assert credentials is not None, R'Need credentials' - self._refreshable_credentials = credentials - - -class AzulConnection(Connection): - """ - Improves the request logging by the Elasticsearch client library with - respect to performance and utility. Most importantly, this class logs a - request *before* it is made, not just when a response is received. At INFO - level, only the beginning of a request or response body is logged. At DEBUG - level the complete body is logged. Also eliminates expensive decoding at - INFO level by logging the request body as a raw ``bytes`` literal. At DEBUG - level, the *decoded* (and complete) body is logged as a string literal. - """ - - def perform_request(self, - method: str, - url: str, - params: Mapping[str, Any] | None = None, - body: bytes | None = None, - timeout: int | float | None = None, - ignore: Collection[int] = (), - headers: Mapping[str, str] | None = None - ) -> Tuple[int, Mapping[str, str], str]: - self._log_request(method, self._full_url(url, params), headers, body) - return super().perform_request(method, url, params, body, timeout, ignore, headers) - - def log_request_success(self, - method: str, - full_url: str, - path: str, - body: bytes | None, - status_code: int, - response: str, - duration: float - ) -> None: - self._log_response(logging.INFO, status_code, duration, full_url, method, response) - self._log_trace(method, path, body, status_code, response, duration) - - def log_request_fail(self, - method: str, - full_url: str, - path: str, - body: bytes | None, - duration: float, - status_code: int | None = None, - response: str | None = None, - exception: Exception | None = None - ) -> None: - self._log_response(logging.INFO if method == 'HEAD' and status_code == 404 else logging.WARN, - status_code, duration, full_url, method, response, exception) - self._log_trace(method, path, body, status_code, response, duration) - - # Duplicates functionality in the ``perform_request`` method of the base - # class so that our override of that method can log it speculatively. We - # also log the full URL *actually* used by the base class when the response - # is received, since it is only then that it is passed to our overrides of - # ``log_request_success`` and ``log_request_fail``. - - def _full_url(self, url: str, params: Mapping[str, Any] | None) -> str: - full_url = self.host + self.url_prefix + url - if params: - full_url = f'{full_url}?{urlencode(params)}' - return full_url - - def _log_request(self, method, full_url, headers, body): - es_log.info('Making %s request to %s', method, full_url) - es_log.debug('… with request headers %r', headers) - es_log.info(http_body_log_message('request', body)) - - def _log_response(self, - log_level: int, - status_code: int | None, - duration: float, - full_url: str, - method: str, - response: str | None, - exception=None - ) -> None: - status_code = 'no' if status_code is None else status_code - # Note that here we log the full URL actually used, see _full_url above - es_log.log(log_level, 'Got %s response after %.3fs from %s to %s', - status_code, duration, method, full_url, exc_info=exception) - es_log.log(log_level, http_body_log_message('response', response)) - - -class AWSAuthHttpClient(urllib3.request.RequestMethods): - """ - Decorates a urllib3 HTTPConnectionPool instance so that requests are - signed with AWS's Signature Version 4 flavor of HMAC. - """ - - def __init__(self, - pool: urllib3.HTTPConnectionPool, - http_auth: BotoAWSRequestsAuth): - super().__init__() - self._inner = pool - self._http_auth = http_auth - - def urlopen(self, # type: ignore[override] - method: str, - url: str, - body: bytes | None = None, - headers: Mapping[str, str] | None = None, - **kwargs - ) -> urllib3.HTTPResponse: - # self._http_auth is an instance of BotoAWSRequestsAuth, a subclass of - # AuthBase from the Requests library. To use that instance with urllib3 - # directly, we need to prepare a Requests request object, sign it with - # self._http_auth and pass the resulting signature header to urllib3's - # urlopen() method. - request = requests.PreparedRequest() - request.method = method - # Because urllib3 connection pools are host-specific, URLs passed to a - # connection pool's urlencode() must be relative and path-absolute. And - # while PreparedRequest.prepare() requires an absolute URL, we can sneak - # a relative one in by setting the attribute directly. This neatly - # avoids having to compose an absolute URL and the URL-encoding - # ambiguities that entails. The Elasticsearch client, for example, - # encodes colons in absolute paths even though the leading slash in such - # a path makes that unnecessary. These ambiguities could lead to an - # invalid signature. The AWS signature algorithm only looks at path and - # query of URLs. - assert url.startswith('/'), url - request.url = url - request.headers = headers - request.body = body - request = self._http_auth(request) - # Note that the various urlopen() implementations in urllib3 declare the - # `body` argument with a default value, making it a keyword argument, - # the ES client passes it as a positional. If this were ever to change, - # this method would get a duplicate of the `body` argument as part of - # `kwargs`, resulting in a TypeError. - return self._inner.urlopen(method, url, body, headers=request.headers, **kwargs) - - def close(self): - self._inner.close() - - -class AzulUrllib3HttpConnection(AzulConnection, Urllib3HttpConnection): - - def __init__(self, - *args, - http_auth: BotoAWSRequestsAuth | None = None, - **kwargs - ) -> None: - super().__init__(*args, **kwargs) - if http_auth is not None: - # We can't extend the pool class because we don't control the - # instantiation. We therefore have to decorate the pool instance. - # Looking at the source of Urllib3HttpConnection we notice that only - # the methods `urlopen()` and `close()` are called. This means that - # the decorating class doesn't need to implement (or extend) a full - # HTTPConnectionPool, only the much slimmer RequestMethods. - client = AWSAuthHttpClient(self.pool, http_auth) - # We still need the cast because the stub declares `self.pool` to be - # an instance of HTTPConnectionPool. - self.pool = cast(urllib3.HTTPConnectionPool, client) - - -class ESClientFactory: - - @classmethod - def get(cls) -> OpenSearch: - host, port = aws.es_endpoint - return cls._create_client(host, port, config.es_timeout) - - @classmethod - @lru_cache(maxsize=32) - def _create_client(cls, host, port, timeout): - log.debug(f'Creating ES client [{host}:{port}]') - # Implicit retries don't make much sense in conjunction with optimistic - # locking (versioning). Consider a write request that times out in ELB - # with a 504 while the upstream ES node actually finishes the request. - # Retrying that individual write request will fail with a 409. Instead - # of retrying just the write request, the entire read-modify-write - # transaction needs to be retried. In order to be in full control of - # error handling, we disable the implicit retries via max_retries=0. - common_params = dict(hosts=[dict(host=host, port=port)], - timeout=timeout, - max_retries=0) - if host.endswith('.amazonaws.com'): - aws_auth = CachedBotoAWSRequestsAuth(aws_host=host, - aws_region=aws.region_name, - aws_service='es') - return OpenSearch(http_auth=aws_auth, - use_ssl=True, - verify_certs=True, - connection_class=AzulUrllib3HttpConnection, - **common_params) - else: - return OpenSearch(connection_class=AzulUrllib3HttpConnection, - **common_params) diff --git a/src/azul/exceptions.py b/src/azul/exceptions.py deleted file mode 100644 index 6cc6ab742f..0000000000 --- a/src/azul/exceptions.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import ( - Callable, - Type, - TypeVar, -) - -E = TypeVar('E', bound=BaseException) -R = TypeVar('R') - - -def catch(f: Callable[..., R], - exception_cls: Type[E], - /, - *args, - **kwargs - ) -> tuple[E, None] | tuple[None, R]: - """ - Invoke the given callable. If the callable raises an instance of the - specified exception class, return that exception, otherwise return the - result of the callable. - - :param f: The callable to invoke - - :param exception_cls: The class of exceptions to catch - - :param args: Positional arguments to the callable - - :param kwargs: Keyword arguments to the callable - - :return: Either a tuple of None and the return value of the callable or a - tuple of the exception raised by the callable and None - - >>> catch(int, Exception, '42') - (None, 42) - - >>> catch(int, Exception, '42', base=16) - (None, 66) - - >>> catch(int, ValueError, '') - (ValueError("invalid literal for int() with base 10: ''"), None) - - >>> catch(int, BaseException, '') - (ValueError("invalid literal for int() with base 10: ''"), None) - - >>> catch(int, NotImplementedError, '') - Traceback (most recent call last): - ... - ValueError: invalid literal for int() with base 10: '' - - >>> catch(int, ValueError, '', base=16) - (ValueError("invalid literal for int() with base 16: ''"), None) - - >>> catch(int, ValueError, '', base=16) - (ValueError("invalid literal for int() with base 16: ''"), None) - """ - try: - return None, f(*args, **kwargs) - except exception_cls as e: - return e, None diff --git a/src/azul/files.py b/src/azul/files.py deleted file mode 100644 index 8a4bb92401..0000000000 --- a/src/azul/files.py +++ /dev/null @@ -1,57 +0,0 @@ -from contextlib import ( - contextmanager, -) -import hashlib -from os import ( - PathLike, -) -import os.path -import tempfile - - -@contextmanager -def write_file_atomically(path, mode=0o644): - dir_path, file_name = os.path.split(path) - fd, temp_path = tempfile.mkstemp(dir=dir_path) - try: - with os.fdopen(fd, 'w') as f: - yield f - os.chmod(temp_path, mode) - os.rename(temp_path, path) - except BaseException: - os.unlink(temp_path) - raise - - -def file_sha1(path: str | PathLike) -> str: - """ - >>> file_sha1('/dev/null') - 'da39a3ee5e6b4b0d3255bfef95601890afd80709' - - >>> from tempfile import NamedTemporaryFile - >>> with NamedTemporaryFile() as f: - ... f.write(b'f' * (1024 * 1024 - 1)) - ... file_sha1(f.name) - 1048575 - 'f5e766a4faaac674df1dfb707f6557b67bebe99b' - - >>> with NamedTemporaryFile() as f: - ... f.write(b'f' * 1024 * 1024) - ... file_sha1(f.name) - 1048576 - 'c08874b8aacb429a677f0ad660d64919e7d56734' - - >>> with NamedTemporaryFile() as f: - ... f.write(b'f' * (1024 * 1024 + 1)) - ... file_sha1(f.name) - 1048577 - '6a8e89f614a497f5cf741a50d5c2f3c2e430db4e' - """ - with open(path, 'rb') as f: - sha1 = hashlib.sha1() - while True: - data = f.read(1024 * 1024) - if not data: - break - sha1.update(data) - return sha1.hexdigest() diff --git a/src/azul/health.py b/src/azul/health.py deleted file mode 100644 index 4a5f0031db..0000000000 --- a/src/azul/health.py +++ /dev/null @@ -1,529 +0,0 @@ -from collections.abc import ( - Iterable, - Mapping, -) -from itertools import ( - chain, -) -import json -import logging -import random -import time -from typing import ( - ClassVar, -) - -import attr -from botocore.exceptions import ( - ClientError, -) -import chalice -from chalice import ( - ChaliceViewError, - NotFoundError, - Response, -) -from furl import ( - furl, -) -import requests - -from azul import ( - CatalogName, - R, - cache, - cached_property, - config, - lru_cache, - require, -) -from azul.chalice import ( - AppController, - AzulChaliceApp, - LambdaMetric, -) -from azul.deployment import ( - aws, -) -from azul.es import ( - ESClientFactory, -) -from azul.openapi import ( - format_description, - params, - responses, - schema, -) -from azul.plugins import ( - MetadataPlugin, -) -from azul.service.storage_service import ( - StorageObjectNotFound, - StorageService, -) -from azul.types import ( - JSON, - MutableJSON, - json_bool, -) - -log = logging.getLogger(__name__) - - -# noinspection PyPep8Naming -class health_property(cached_property): - """ - Use this to decorate any methods you would like to be automatically - returned by HealthController.as_json(). Be sure to provide a docstring in - the decorated method. - """ - - def __get__(self, obj, objtype=None): - log.info('Getting health property %r', self.key) - return super().__get__(obj, objtype=objtype) - - @property - def key(self): - return self.fget.__name__ - - @property - def description(self): - return self.fget.__doc__ - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class HealthController(AppController): - lambda_name: str - - @cached_property - def storage_service(self): - return StorageService() - - @cache - def metadata_plugin(self, catalog: CatalogName) -> MetadataPlugin: - return MetadataPlugin.load(catalog).create() - - def basic_health(self) -> Response: - return self._make_response({'up': True}) - - def health(self) -> Response: - return self._make_response(self._health.as_json(Health.all_keys)) - - def custom_health(self, keys: str | None) -> Response: - if keys is None: - body = self._health.as_json(Health.all_keys) - elif isinstance(keys, str): - assert keys # Chalice maps empty string to None - try: - body = self._health.as_json(keys.split(',')) - except AssertionError as e: - if R.caused(e): - body = {'Message': 'Invalid health keys'} - else: - raise - else: - body = {'Message': 'Invalid health keys'} - return self._make_response(body) - - def fast_health(self) -> Response: - return self._make_response(self._health.as_json_fast()) - - def cached_health(self) -> JSON: - if self.app.catalog != config.default_catalog: - raise NotFoundError('Health is only cached for default catalog', - self.app.catalog, config.default_catalog) - else: - try: - cache = json.loads(self.storage_service.get(f'health/{self.lambda_name}')) - except StorageObjectNotFound: - raise NotFoundError('Cached health object does not exist') - else: - max_age = 2 * 60 - if time.time() - cache['time'] > max_age: - raise ChaliceViewError('Cached health object is stale') - else: - body = cache['health'] - return body - - def update_cache(self) -> None: - assert self.app.catalog == config.default_catalog - health_object = dict(time=time.time(), health=self._health.as_json_fast()) - self.storage_service.put(object_key=f'health/{self.lambda_name}', - data=json.dumps(health_object).encode()) - - @property - def _health(self): - # Don't cache. A Health instance is meant to be short-lived since it - # applies its own caching. If we cached the instance, we'd never observe - # any changes in health. - return Health(controller=self, catalog=self.app.catalog) - - def _make_response(self, body: JSON) -> Response: - try: - up = body['up'] - except KeyError: - status = 400 - else: - status = 200 if up else 503 - return Response(body=json.dumps(body), status_code=status) - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class Health: - """ - Encapsulates information about the health status of an Azul deployment. All - aspects of health are exposed as lazily loaded properties. Instantiating the - class does not examine any resources, only accessing the individual - properties does, or using the `to_json` method. - """ - controller: HealthController - catalog: str - _random: ClassVar[random.Random] = random.Random() - - @property - def lambda_name(self): - return self.controller.lambda_name - - def as_json(self, keys: Iterable[str]) -> JSON: - keys = frozenset(keys) - if keys: - require(keys <= self.all_keys) - else: - keys = self.all_keys - json = {k: getattr(self, k) for k in sorted(keys)} - json['up'] = all(v['up'] for v in json.values()) - return json - - @health_property - def other_lambdas(self) -> JSON: - """ - Indicates whether the companion REST API responds to HTTP requests. - """ - response = { - lambda_name: self._lambda(lambda_name) - for lambda_name in config.lambda_names() - if lambda_name != self.lambda_name - } - return { - 'up': all(json_bool(v['up']) for v in response.values()), - **response - } - - @health_property - def queues(self): - """ - Returns information about the SQS queues used by the indexer and the - mirror. - """ - sqs = aws.sqs_resource - response: MutableJSON = {'up': True} - for queue in config.all_queue_names: - try: - queue_instance = sqs.get_queue_by_name(QueueName=queue).attributes - except ClientError as ex: - response[queue] = { - 'up': False, - 'error': ex.response['Error']['Message'] - } - response['up'] = False - else: - response[queue] = { - 'up': True, - 'messages': { - 'delayed': int(queue_instance['ApproximateNumberOfMessagesDelayed']), - 'invisible': int(queue_instance['ApproximateNumberOfMessagesNotVisible']), - 'queued': int(queue_instance['ApproximateNumberOfMessages']) - } - } - return response - - @health_property - def progress(self) -> JSON: - """ - The number of Data Store bundles pending to be indexed and the number - of index documents in need of updating. - """ - return { - 'up': True, - 'unindexed_bundles': sum(self.queues[config.notifications_queue.name].get('messages', {}).values()), - 'unindexed_documents': sum(chain.from_iterable( - self.queues[config.tallies_queue.derive(retry=retry).name].get('messages', {}).values() - for retry in (False, True) - )) - } - - def _api_endpoint(self, entity_type: str) -> JSON: - relative_url = furl(path=('index', entity_type), args={'size': '1'}) - url = str(config.service_endpoint.join(relative_url)) - log.info('Making HEAD request to %s', url) - start = time.time() - response = requests.head(url) - log.info('Got %s response after %.3fs from HEAD request to %s', - response.status_code, time.time() - start, url) - try: - response.raise_for_status() - except requests.exceptions.HTTPError as e: - return {'up': False, 'error': repr(e)} - else: - return {'up': True} - - @cached_property - def entity_types(self): - return self.controller.metadata_plugin(self.catalog).exposed_indices.keys() - - @health_property - def api_endpoints(self): - """ - Indicates whether important service API endpoints are operational. - """ - entity_type = self._random.choice(list(self.entity_types)) - return self._api_endpoint(entity_type) - - @health_property - def elasticsearch(self): - """ - Indicates whether the Elasticsearch cluster is responsive. - """ - return { - 'up': ESClientFactory.get().ping(), - } - - @lru_cache - def _lambda(self, lambda_name) -> JSON: - try: - url = config.lambda_endpoint(lambda_name).set(path='/health/basic', - args={'catalog': self.catalog}) - log.info('Requesting %r', url) - response = requests.get(str(url)) - response.raise_for_status() - up = response.json()['up'] - except Exception as e: - return { - 'up': False, - 'error': repr(e) - } - else: - return { - 'up': up, - } - - fast_properties: ClassVar[Mapping[str, Iterable[health_property]]] = { - 'indexer': ( - elasticsearch, - queues, - progress - ), - 'service': ( - elasticsearch, - api_endpoints, - ) - } - - def as_json_fast(self) -> JSON: - return self.as_json(p.key for p in self.fast_properties[self.lambda_name]) - - all_properties: ClassVar[Iterable[health_property]] = tuple( - p for p in locals().values() if isinstance(p, health_property) - ) - - all_keys: ClassVar[frozenset[str]] = frozenset(p.key for p in all_properties) - - -class HealthApp(AzulChaliceApp): - - @cached_property - def health_controller(self) -> HealthController: - return HealthController(app=self, lambda_name=self.unqualified_app_name) - - def default_routes(self): - _routes = super().default_routes() - _app_name = self.unqualified_app_name - - _up_key = { - 'up': format_description(''' - indicates the overall result of the health check - '''), - } - - _fast_keys = { - **{ - prop.key: format_description(prop.description) - for prop in Health.fast_properties[_app_name] - }, - **_up_key - } - - _all_keys = { - **{ - prop.key: format_description(prop.description) - for prop in Health.all_properties - }, - **_up_key - } - - def _health_spec(health_keys: dict) -> JSON: - return { - 'responses': { - f'{200 if up else 503}': { - 'description': format_description(f''' - {'The' if up else 'At least one of the'} checked resources - {'are' if up else 'is not'} healthy. - - The response consists of the following keys: - - ''') + ''.join(f'* `{k}` {v}' for k, v in health_keys.items()) + format_description(f''' - - The top-level `up` key of the response is - `{'true' if up else 'false'}`. - - ''') + (format_description(f''' - {'All' if up else 'At least one'} of the nested `up` keys - {'are `true`' if up else 'is `false`'}. - ''') if len(health_keys) > 1 else ''), - **responses.json_content( - schema.object( - additionalProperties=schema.object( - additionalProperties=True, - up=schema.enum(up) - ), - up=schema.enum(up) - ), - example={ - k: up if k == 'up' else {} for k in health_keys - } - ) - } for up in [True, False] - }, - 'tags': ['Auxiliary'] - } - - @self.route( - '/health', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Complete health check', - 'description': format_description(f''' - Health check of the {_app_name} REST API and all - resources it depends on. This may take long time to complete - and exerts considerable load on the API. For that reason it - should not be requested frequently or by automated - monitoring facilities that would be better served by the - [`/health/fast`](#operations-Auxiliary-get_health_fast) or - [`/health/cached`](#operations-Auxiliary-get_health_cached) - endpoints. - '''), - **_health_spec(_all_keys) - } - ) - def health(): - return self.health_controller.health() - - @self.route( - '/health/basic', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Basic health check', - 'description': format_description(f''' - Health check of only the REST API itself, excluding other - resources that it depends on. A 200 response indicates that - the {_app_name} is reachable via HTTP(S) but nothing - more. - '''), - **_health_spec(_up_key) - } - ) - def basic_health(): - return self.health_controller.basic_health() - - @self.route( - '/health/cached', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Cached health check for continuous monitoring', - 'description': format_description(f''' - Return a cached copy of the - [`/health/fast`](#operations-Auxiliary-get_health_fast) - response. This endpoint is optimized for continuously - running, distributed health monitors such as Route 53 health - checks. The cache ensures that the {_app_name} is not - overloaded by these types of health monitors. The cache is - updated every minute. - '''), - **_health_spec(_fast_keys) - } - ) - def cached_health(): - return self.health_controller.cached_health() - - @self.route( - '/health/fast', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Fast health check', - 'description': format_description(''' - Performance-optimized health check of the REST API and other - critical resources tht it depends on. This endpoint can be - requested more frequently than - [`/health`](#operations-Auxiliary-get_health) but - periodically scheduled, automated requests should be made to - [`/health/cached`](#operations-Auxiliary-get_health_cached). - '''), - **_health_spec(_fast_keys) - } - ) - def fast_health(): - return self.health_controller.fast_health() - - @self.route( - '/health/{keys}', - methods=['GET'], - cors=True, - spec={ - 'summary': 'Selective health check', - 'description': format_description(''' - This endpoint allows clients to request a health check on a - specific set of resources. Each resource is identified by a - *key*, the same key under which the resource appears in a - [`/health`](#operations-Auxiliary-get_health) response. - '''), - **_health_spec(_all_keys) - }, path_spec={ - 'parameters': [ - params.path( - 'keys', - form=schema.array(schema.enum(*sorted(Health.all_keys))), - description=''' - A comma-separated list of keys selecting the health - checks to be performed. Each key corresponds to an - entry in the response. - ''') - ] - } - ) - def custom_health(keys: str | None = None): - return self.health_controller.custom_health(keys) - - @self.metric_alarm(metric=LambdaMetric.errors, - threshold=1, - period=24 * 60 * 60) - @self.metric_alarm(metric=LambdaMetric.throttles, - threshold=0, - period=5 * 60) - @self.retry(num_retries=0) - # FIXME: Remove redundant prefix from name - # https://github.com/DataBiosphere/azul/issues/5337 - @self.schedule( - 'rate(1 minute)', - name=self.unqualified_app_name + 'cachehealth' - ) - def update_health_cache(_event: chalice.app.CloudWatchEvent): - self.health_controller.update_cache() - - return { - **_routes, - **{k: v for k, v in locals().items() if not k.startswith('_')} - } diff --git a/src/azul/hmac.py b/src/azul/hmac.py deleted file mode 100644 index fa9c5e7cd6..0000000000 --- a/src/azul/hmac.py +++ /dev/null @@ -1,104 +0,0 @@ -import hashlib -import logging - -import chalice -from http_message_signatures import ( - HTTPMessageSigner, - HTTPMessageVerifier, - HTTPSignatureKeyResolver, -) -from http_message_signatures.algorithms import ( - HMAC_SHA256, -) -import http_sfv -from more_itertools import ( - one, -) -import requests -import requests.sessions -import requests.structures - -from azul import ( - cached_property, -) -from azul.auth import ( - HMACAuthentication, -) -from azul.deployment import ( - aws, -) - -log = logging.getLogger(__name__) - - -class SignatureHelper(HTTPSignatureKeyResolver): - """ - Client-side signing of HTTP requests and server-side checking of the - resulting signatures. On the client, requests are represented as instances - of requests.Request. On the server, chalice.Request is used. Internally - though, the latter is converted back to the former. - - This class should work as both a mix-in, and stand-alone. - """ - - @cached_property - def verifier(self): - return HTTPMessageVerifier(signature_algorithm=HMAC_SHA256, - key_resolver=self) - - @cached_property - def signer(self): - return HTTPMessageSigner(signature_algorithm=HMAC_SHA256, - key_resolver=self) - - def auth_from_request(self, - request: chalice.app.Request - ) -> HMACAuthentication | None: - try: - request.headers['signature'] - except KeyError: - return None - else: - key_id = self.verify(request) - return HMACAuthentication(key_id) - - def resolve_public_key(self, key_id: str) -> bytes: - return self.resolve_private_key(key_id) - - def resolve_private_key(self, key_id: str) -> bytes: - key, actual_key_id = aws.get_hmac_key_and_id() - assert actual_key_id == key_id - return key - - def verify(self, current_request: chalice.app.Request) -> str: - try: - base_url = current_request.headers['host'] - path = current_request.context['path'] - endpoint = f'http://{base_url}{path}' - method = current_request.context['httpMethod'] - headers = current_request.headers - request = requests.Request(method, endpoint, headers, data=current_request.raw_body).prepare() - result = one(self.verifier.verify(request)) - except BaseException as e: - log.warning('Exception while validating HMAC: ', exc_info=e) - raise chalice.UnauthorizedError('Invalid authorization credentials') - else: - return result.parameters - - def sign_and_send(self, request: requests.Request) -> requests.Response: - request = request.prepare() - self.sign(request) - with requests.sessions.Session() as session: - response = session.send(request) - return response - - def sign(self, request: requests.PreparedRequest): - body = request.body - assert body is not None - digest = hashlib.sha256(body).digest() - assert isinstance(request.headers, requests.structures.CaseInsensitiveDict) - request.headers['Content-Digest'] = str(http_sfv.Dictionary({'sha-256': digest})) - key, key_id = aws.get_hmac_key_and_id() - self.signer.sign(request, - key_id=key_id, - covered_component_ids=('@method', '@path', 'content-digest')) diff --git a/src/azul/indexer/__init__.py b/src/azul/indexer/__init__.py deleted file mode 100644 index a6a60fc3a2..0000000000 --- a/src/azul/indexer/__init__.py +++ /dev/null @@ -1,692 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from functools import ( - total_ordering, -) -from itertools import ( - product, -) -import json -import logging -import math -from threading import ( - RLock, -) -from typing import ( - Any, - ClassVar, - Iterable, - Iterator, - Self, - cast, - final, -) - -import attrs - -from azul import ( - R, - config, -) -from azul.attrs import ( - SerializableAttrs, -) -from azul.json import ( - Parseable, -) -from azul.types import ( - AnyJSON, - JSON, - SupportsLessAndGreaterThan, - derived_type_params, -) -from azul.uuids import ( - UUIDPartition, - validate_uuid_prefix, -) - -log = logging.getLogger(__name__) - -BundleUUID = str -BundleVersion = str - - -# PyCharm can't handle mixing `attrs` with `total_ordering` and falsely claims -# that comparison operators besides `__lt__` are not defined. -# noinspection PyDataclass -@attrs.frozen(kw_only=True, eq=False) -@total_ordering -class BundleFQID(SerializableAttrs): - """ - A fully qualified bundle identifier. The attributes defined in this class - must always be sufficient to decide whether two instances of this class or - its subclasses identify the same bundle or not. Subclasses may define - additional attributes to help describe the bundle, but they are forbidden - from using these attributes in the implementations of their `__eq__` or - `__hash__` methods, either explicitly or in code generated by `attrs`. - """ - uuid: BundleUUID - version: BundleVersion - - def _nucleus(self) -> tuple[str, str]: - return self.uuid.lower(), self.version.lower() - - # We can't use attrs' generated implementation because it always - # considers operands with different types to be unequal, regardless of - # their inheritance relationships or how their attributes are annotated - # (e.g. specifying `eq=False` has no effect). We want instances of - # all subclasses to compare equal as long as `uuid` and `version` are - # equal. For the same reason, we can't use `typing.Self` in the signature - # because it would constrain the RHS to instances of subclasses of the LHS. - @final - def __eq__(self, other: Any) -> bool: - """ - >>> b1 = BundleFQID(uuid='a', version='b') - >>> b2 = BundleFQID(uuid='a', version='b') - >>> b1 == b2 - True - - >>> s1 = SourceRef(id='x', spec=SimpleSourceSpec.parse('y:/0')) - >>> sb1 = SourcedBundleFQID(uuid='a', version='b', source=s1) - >>> sb2 = SourcedBundleFQID(uuid='a', version='b', source=s1) - >>> sb1 == sb2 - True - - >>> b1 == sb1 - True - - >>> s2 = SourceRef(id='w', spec=SimpleSourceSpec.parse('z:/0')) - >>> sb3 = SourcedBundleFQID(uuid='a', version='b', source=s2) - >>> b1 == sb3 - True - - >>> sb1 == sb3 - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: (('a', 'b'), - SourceRef(id='x', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='y')), - SourceRef(id='w', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='z'))) - """ - if isinstance(other, BundleFQID): - same_bundle = self._nucleus() == other._nucleus() - if ( - same_bundle - and isinstance(self, SourcedBundleFQID) - and isinstance(other, SourcedBundleFQID) - ): - assert self.source == other.source, (self._nucleus(), self.source, other.source) - return same_bundle - else: - return False - - @final - def __hash__(self) -> int: - return hash(self._nucleus()) - - def __init_subclass__(cls): - """ - >>> @attrs.frozen(kw_only=True) - ... class FooBundleFQID(SourcedBundleFQID): - ... foo: str - Traceback (most recent call last): - ... - AssertionError: - - >>> @attrs.frozen(kw_only=True, eq=False) - ... class FooBundleFQID(SourcedBundleFQID): - ... foo: str - """ - super().__init_subclass__() - assert cls.__eq__ is BundleFQID.__eq__, cls - assert cls.__hash__ is BundleFQID.__hash__, cls - - # attrs doesn't allow `order=True` when `eq=False` - def __lt__(self, other: 'BundleFQID') -> bool: - """ - >>> aa = BundleFQID(uuid='a', version='a') - >>> ab = BundleFQID(uuid='a', version='b') - >>> ba = BundleFQID(uuid='b', version='a') - >>> aa < ab < ba - True - - >>> ba > ab > aa - True - - >>> aa <= ab <= ba - True - - >>> ba >= ab >= aa - True - - >>> aa != ab != ba - True - """ - return self._nucleus() < other._nucleus() - - -@attrs.frozen(kw_only=True) -class Prefix: - common: str = '' - partition: int - of_everything: ClassVar['Prefix'] - - digits = '0123456789abcdef' - - def __attrs_post_init__(self): - validate_uuid_prefix(self.common) - assert ':' not in self.common, self.common - assert isinstance(self.partition, int), self.partition - # Version 4 UUIDs specify fixed bits in the third dash-seperated - # group. To ensure that any concatenation of common and - # partition_prefix is a valid UUID prefix, we restrict the number of - # characters from the concatenation to be within the first - # dash-seperated group. - assert len(self.common) + self.partition <= 8, R( - 'Invalid common prefix and partition length', self) - - @classmethod - def parse(cls, prefix: str) -> Self: - """ - >>> Prefix.parse('aa/1') - Prefix(common='aa', partition=1) - - >>> p = Prefix.parse('a') - Traceback (most recent call last): - ... - ValueError: ('Missing partition prefix length', 'a') - - >>> Prefix.parse('aa/') - Traceback (most recent call last): - ... - AssertionError: R('Prefix source cannot end in a delimiter', 'aa/', '/') - - >>> Prefix.parse('8f538f53/1').partition_prefixes() # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Invalid common prefix and partition length', - Prefix(common='8f538f53', partition=1)) - - >>> list(Prefix.parse('8f538f53/0').partition_prefixes()) - ['8f538f53'] - - >>> Prefix.parse('aa/bb') - Traceback (most recent call last): - ... - ValueError: ('Partition prefix length must be an integer', 'bb') - - >>> Prefix.parse('') - Traceback (most recent call last): - ... - AssertionError: R('Cannot parse an empty prefix source') - """ - source_delimiter = '/' - assert prefix != '', R('Cannot parse an empty prefix source') - assert not prefix.endswith(source_delimiter), R( - 'Prefix source cannot end in a delimiter', prefix, source_delimiter) - partition: str | int - try: - entry, partition = prefix.split(source_delimiter) - except ValueError: - raise ValueError('Missing partition prefix length', prefix) - try: - partition = int(partition) - except ValueError: - raise ValueError('Partition prefix length must be an integer', partition) - validate_uuid_prefix(entry) - return cls(common=entry, partition=partition) - - @classmethod - def for_main_deployment(cls, num_subgraphs: int) -> Self: - """ - A prefix that is expected to rarely exceed 8192 subgraphs per partition - - >>> str(Prefix.for_main_deployment(0)) - Traceback (most recent call last): - ... - ValueError: math domain error - - >>> str(Prefix.for_main_deployment(1)) - '/0' - - >>> cases = [-1, 0, 1, 2] - - >>> n = 8192 - >>> [str(Prefix.for_main_deployment(n + i)) for i in cases] - ['/0', '/0', '/1', '/1'] - - Sources with this many bundles are very rare, so we have a generous - margin of error surrounding this cutoff point - - >>> n = 8192 * 16 - >>> [str(Prefix.for_main_deployment(n + i)) for i in cases] - ['/1', '/1', '/2', '/2'] - """ - partition = cls._prefix_length(num_subgraphs, 8192) - return cls(common='', partition=partition) - - @classmethod - def for_lesser_deployment(cls, num_subgraphs: int) -> Self: - """ - A prefix that yields an average of approximately 24 subgraphs per - source, using an experimentally derived heuristic formula designed to - minimize manual adjustment of the computed common prefixes. The - partition prefix length is always 1, even though some partitions may be - empty, to provide test coverage for handling multiple partitions. - - >>> str(Prefix.for_lesser_deployment(0)) - Traceback (most recent call last): - ... - ValueError: math domain error - - >>> str(Prefix.for_lesser_deployment(1)) - '/1' - - >>> cases = [-1, 0, 1, 2] - - >>> n = 64 - >>> [str(Prefix.for_lesser_deployment(n + i)) for i in cases] - ['/1', '/1', '0/1', '1/1'] - - >>> n = 64 * 16 - >>> [str(Prefix.for_lesser_deployment(n + i)) for i in cases] - ['e/1', 'f/1', '00/1', '10/1'] - """ - digits = f'{num_subgraphs - 1:x}'[::-1] - length = cls._prefix_length(num_subgraphs, 64) - assert length < len(digits), num_subgraphs - return cls(common=digits[:length], partition=1) - - @classmethod - def _prefix_length(cls, n, m) -> int: - return max(0, math.ceil(math.log(n / m, len(cls.digits)))) - - def partition_prefixes(self) -> Iterator[str]: - """ - >>> list(Prefix.parse('/0').partition_prefixes()) - [''] - - >>> list(Prefix.parse('a/1').partition_prefixes()) - ['a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'aa', 'ab', 'ac', 'ad', 'ae', 'af'] - - >>> len(list(Prefix.parse('/2').partition_prefixes())) - 256 - """ - for partition_prefix_digits in product(self.digits, repeat=self.partition): - complete_prefix = ''.join((self.common, *partition_prefix_digits)) - validate_uuid_prefix(complete_prefix) - yield complete_prefix - - @property - def num_partitions(self) -> int: - """ - Equivalent to `len(self.partition_prefixes())`, but more efficient. - - >>> Prefix.parse('aa/0').num_partitions - 1 - >>> Prefix.parse('/3').num_partitions - 4096 - >>> Prefix.parse('aa/3').num_partitions - 4096 - """ - return len(self.digits) ** self.partition - - def __str__(self): - """ - >>> s = 'aa/1' - >>> s == str(Prefix.parse(s)) - True - """ - return f'{self.common}/{self.partition}' - - def __len__(self): - """ - >>> len(Prefix.parse('aa/0')) - 2 - >>> len(Prefix.parse('/3')) - 3 - >>> len(Prefix.parse('aa/3')) - 5 - """ - return len(self.common) + self.partition - - def __contains__(self, partition_prefix: str) -> bool: - """ - Same as `partition_prefix in prefix.partition_prefixes()` but more - efficient. See also :meth:`partition_prefixes`. - - >>> p0, p1, p2 = Prefix.parse('/0'), Prefix.parse('/1'), Prefix.parse('/2') - >>> 'a' in p0, 'a' in p1, 'a' in p2 - (False, True, False) - - >>> p1, p2, p3 = Prefix.parse('a/0'), Prefix.parse('a/1'), Prefix.parse('a/2') - >>> 'ab' in p1, 'ab' in p2, 'ab' in p3 - (False, True, False) - - >>> 'ab' in Prefix.parse('b/1') - False - - >>> 'ag' in Prefix.parse('a/1') - False - - >>> 'aB' in Prefix.parse('a/1') - False - """ - return ( - partition_prefix.startswith(self.common) - and len(partition_prefix) == len(self) - and all(c in self.digits for c in partition_prefix[len(self.common):]) - ) - - -Prefix.of_everything = Prefix.parse('/0') - - -@attrs.frozen(kw_only=True, order=True) -class SourceSpec(Parseable, metaclass=ABCMeta): - """ - The name of a repository source containing bundles to index. A repository - has at least one source. Repository plugins whose repository source names - are structured might want to implement this abstract class. Plugins that - have simple unstructured names may want to use :class:`SimpleSourceSpec`. - """ - - # FIXME: Improve equality and interning semantics for source ref and spec - # https://github.com/DataBiosphere/azul/issues/6778 - prefix: Prefix | None - - @classmethod - def parse_prefix_only(cls, spec: str) -> Prefix | None: - """ - Parse only the prefix component of a string representation of a - `SourceSpec.` To parse the entire spec, use :meth:`parse`. A return - value of `None` indicates that no prefix is configured for the spec. - - >>> SourceSpec.parse_prefix_only('foo:/0') - Prefix(common='', partition=0) - - >>> SourceSpec.parse_prefix_only('foo:') is None - True - - >>> SourceSpec.parse_prefix_only('foo') - Traceback (most recent call last): - ... - AssertionError: R('Invalid source specification', 'foo') - """ - _, prefix = cls._parse(spec) - return prefix - - @classmethod - @abstractmethod - def parse(cls, spec: str) -> Self: - raise NotImplementedError - - @classmethod - def _parse(cls, spec: str) -> tuple[str, Prefix | None]: - rest, sep, prefix = spec.rpartition(':') - assert sep != '', R('Invalid source specification', spec) - prefix = Prefix.parse(prefix) if prefix else None - return rest, prefix - - @property - def _prefix_str(self) -> str: - return '' if self.prefix is None else str(self.prefix) - - @abstractmethod - def __str__(self) -> str: - raise NotImplementedError - - def eq_ignoring_prefix(self, other: Self) -> bool: - """ - >>> p = SimpleSourceSpec.parse - - >>> p('foo:4/0').eq_ignoring_prefix(p('foo:42/0')) - True - - >>> p('foo:4/0').eq_ignoring_prefix(p('bar:4/0')) - False - """ - return self == attrs.evolve(other, prefix=self.prefix) - - -@attrs.frozen(kw_only=True) -class SimpleSourceSpec(SourceSpec): - """ - Default implementation for unstructured source names. - """ - name: str - - @classmethod - def parse(cls, spec: str) -> Self: - """ - >>> SimpleSourceSpec.parse('https://foo.edu:12/0') # doctest: +NORMALIZE_WHITESPACE - SimpleSourceSpec(prefix=Prefix(common='12', - partition=0), - name='https://foo.edu') - - >>> SimpleSourceSpec.parse('foo') - Traceback (most recent call last): - ... - AssertionError: R('Invalid source specification', 'foo') - - >>> SimpleSourceSpec.parse('foo:8F53/0') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDPrefixError: '8F53' is not a valid UUID prefix. - - >>> SimpleSourceSpec.parse('https:foo.edu/0') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDPrefixError: 'foo.edu' is not a valid UUID prefix. - """ - name, prefix = cls._parse(spec) - self = cls(prefix=prefix, name=name) - assert spec == str(self), spec - return self - - def __str__(self) -> str: - """ - >>> s = 'foo:bar/baz:/0' - >>> s == str(SimpleSourceSpec.parse(s)) - True - - >>> s = 'foo:bar/baz:12/0' - >>> s == str(SimpleSourceSpec.parse(s)) - True - - >>> s = 'foo:bar/baz:12/2' - >>> s == str(SimpleSourceSpec.parse(s)) - True - """ - return f'{self.name}:{self._prefix_str}' - - -@attrs.frozen(kw_only=True, order=True) -class SourceRef[SOURCE_SPEC: SourceSpec](SerializableAttrs, - SupportsLessAndGreaterThan): - """ - A reference to a repository source containing bundles to index. A repository - has at least one source. A source is primarily referenced by its ID but we - drag the spec along to 1) avoid repeatedly looking it up and 2) ensure that - the mapping between the two doesn't change while we index a source. - - Instances of this class are interned: within a Python interpreter process, - there will only ever be one instance of this class for any given ID and - spec. There may be an instance of a subclass of this class that has the same - ID and spec as an instance of this class or another subclass of this class. - - FIXME: Improve equality and interning semantics for source ref and spec - https://github.com/DataBiosphere/azul/issues/6778 - - Note to plugin implementers: Since the source ID can't be assumed to be - globally unique, plugins should subclass this class, even if the subclass - body is empty. - - >>> spec = SimpleSourceSpec(name='', prefix=(Prefix(partition=0))) - >>> list(sorted([ - ... SourceRef(id='d', spec=spec), - ... SourceRef(id='a', spec=spec), - ... ])) - ... # doctest: +NORMALIZE_WHITESPACE - [SourceRef(id='a', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='')), - SourceRef(id='d', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name=''))] - - """ - id: str = attrs.field(order=str.lower) - spec: SOURCE_SPEC = attrs.field(order=False) - - _lookup: ClassVar[dict[tuple[type['SourceRef'], str, SourceSpec], 'SourceRef']] = {} - _lookup_lock = RLock() - - def __new__(cls, *, id: str, spec: SOURCE_SPEC) -> Self: - """ - Interns instances by their ID and spec. Two different sources may still - use the same ID or spec. - - FIXME: Improve equality and interning semantics for source ref and spec - https://github.com/DataBiosphere/azul/issues/6778 - - >>> class S(SourceRef): pass - >>> a, b = SimpleSourceSpec.parse('a:/0'), SimpleSourceSpec.parse('b:/0') - - >>> S(id='1', spec=a) is S(id='1', spec=a) - True - - >>> S(id='1', spec=a) is S(id='2', spec=a) - False - - >>> S(id='1', spec=b) # doctest: +NORMALIZE_WHITESPACE - S(id='1', spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=0), - name='b')) - - Interning is done per class: - - >>> class T(S): pass - >>> T(id='1', spec=a) is S(id='1', spec=a) - False - - >>> T(id='1', spec=a) == S(id='1', spec=a) - False - """ - with cls._lookup_lock: - lookup = cls._lookup - try: - self = lookup[cls, id, spec] - except KeyError: - self = super().__new__(cls) - cls.__init__(self, id=id, spec=spec) - lookup[cls, id, spec] = self - assert isinstance(self, cls) - assert self.id == id - assert self.spec == spec, (self.spec, spec) - return self - - @classmethod - def spec_cls(cls) -> type[SOURCE_SPEC]: - spec_cls = derived_type_params(cls, root=SourceRef)[SOURCE_SPEC] - assert isinstance(spec_cls, type) - assert issubclass(spec_cls, SourceSpec) - return cast(type[SOURCE_SPEC], spec_cls) - - def with_prefix(self, prefix: Prefix) -> Self: - return attrs.evolve(self, spec=attrs.evolve(self.spec, prefix=prefix)) - - -@attrs.frozen(kw_only=True, eq=False) -class SourcedBundleFQID[SOURCE_REF: SourceRef](BundleFQID): - """ - >>> spec = SimpleSourceSpec(name='', prefix=(Prefix(partition=0))) - >>> list(sorted([ - ... SourcedBundleFQID(uuid='d', version='e', source=SourceRef(id='1', spec=spec)), - ... SourcedBundleFQID(uuid='a', version='c', source=SourceRef(id='2', spec=spec)), - ... SourcedBundleFQID(uuid='a', version='b', source=SourceRef(id='3', spec=spec)), - ... ])) - ... # doctest: +NORMALIZE_WHITESPACE - [SourcedBundleFQID(uuid='a', version='b', - source=SourceRef(id='3', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name=''))), - SourcedBundleFQID(uuid='a', version='c', - source=SourceRef(id='2', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name=''))), - SourcedBundleFQID(uuid='d', version='e', - source=SourceRef(id='1', spec=SimpleSourceSpec(prefix=Prefix(common='', partition=0), name='')))] - """ - - source: SOURCE_REF - - @classmethod - def source_ref_cls(cls) -> type[SOURCE_REF]: - ref_cls = derived_type_params(cls, root=SourcedBundleFQID)[SOURCE_REF] - assert isinstance(ref_cls, type) - assert issubclass(ref_cls, SourceRef) - return cast(type[SOURCE_REF], ref_cls) - - -@attrs.define(kw_only=True) -class Bundle[BUNDLE_FQID: BundleFQID](SerializableAttrs, metaclass=ABCMeta): - fqid: BUNDLE_FQID - - @property - def uuid(self) -> BundleUUID: - return self.fqid.uuid - - @property - def version(self) -> BundleVersion: - return self.fqid.version - - @abstractmethod - def drs_uri(self, manifest_entry: JSON) -> str | None: - """ - Return the DRS URI to a data file in this bundle, or None if the data - file is not accessible via DRS. - - :param manifest_entry: the manifest entry of the data file. - """ - raise NotImplementedError - - def _reject_joiner(self, values: Iterable[AnyJSON]): - joiner = config.manifest_column_joiner - # We expect that skipping the check for circular references will provide - # a small performance benefit. The tradeoff is a risk of infinite - # recursion, which we consider unlikely enough to be acceptable. - encoder = json.JSONEncoder(check_circular=False) - for value in values: - for chunk in encoder.iterencode(value): - assert joiner not in chunk, R( - f'The string {joiner!r} is disallowed in metadata', self.fqid) - - @abstractmethod - def reject_joiner(self): - """ - Raise a requirement error if the manifest joiner occurs in this bundle - """ - raise NotImplementedError - - @classmethod - @abstractmethod - def canning_qualifier(cls) -> str: - """ - Short string prepended to the file extension to distinguish between - canned bundle formats originating from different plugins. - """ - raise NotImplementedError - - -class BundlePartition(UUIDPartition): - """ - A binary partitioning of the UUIDs of outer entities in a bundle. - """ - - #: 512 caused timeouts writing contributions, even in the retry Lambda - max_partition_size: ClassVar[int] = 256 - - def divisions(self, num_entities: int) -> int: - return math.ceil(num_entities / self.max_partition_size) - - def __attrs_post_init__(self): - super().__attrs_post_init__() - # Most bits in a v4 or v5 UUID are pseudo-random, including the leading - # 32 bits but those are followed by a couple of deterministic ones. - # For simplicity, we'll limit ourselves to 2 ** 32 leaf partitions. - assert self.prefix_length <= 32, R('Too many partitions', self.prefix_length) diff --git a/src/azul/indexer/action_controller.py b/src/azul/indexer/action_controller.py deleted file mode 100644 index e5d60c4d9c..0000000000 --- a/src/azul/indexer/action_controller.py +++ /dev/null @@ -1,88 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -import logging -import time -from typing import ( - Callable, - Iterable, -) - -import chalice -from chalice.app import ( - SQSRecord, -) - -from azul import ( - R, - cached_property, -) -from azul.chalice import ( - AppController, -) -from azul.queues import ( - Action, - SQSFifoMessage, - SQSMessage, -) -from azul.types import ( - JSON, - derived_type_params, - json_str, -) - -log = logging.getLogger(__name__) - - -class ActionController[A: Action](AppController, metaclass=ABCMeta): - - @property - @abstractmethod - def actions_are_fifo(self) -> bool: - raise NotImplementedError - - @cached_property - def _action_cls(self) -> type[A]: - action_cls = derived_type_params(type(self), root=ActionController)[A] - assert isinstance(action_cls, type), action_cls - return action_cls - - def _load_action(self, action_str: str) -> A: - action_cls = self._action_cls - try: - action = action_cls.from_json(action_str) - except AssertionError as e: - if R.caused(e): - raise R.propagate(e, chalice.BadRequestError) - else: - raise - else: - return action - - def _handle_events(self, - event: Iterable[SQSRecord], - message_handler: Callable[[A, JSON], None]): - for record in event: - message: SQSMessage - if self.actions_are_fifo: - message = SQSFifoMessage.from_record(record) - suffix, args = ', group ID %s', [message.group_id] - else: - message = SQSMessage.from_record(record) - suffix, args = '', [] - log.info('Worker handling message %r, ' + - 'attempt #%i (approx), message ID %s' + suffix, - message.body, message.attempts, message.id, *args) - start = time.time() - try: - action = self._load_action(json_str(message.body['action'])) - message_handler(action, message.body) - except BaseException: - # Note that another problematic outcome is for the Lambda invocation - # to time out, in which case this log message will not be written. - log.warning('Worker failed to handle message %r', message, exc_info=True) - raise - else: - duration = time.time() - start - log.info('Worker successfully handled message %r in %.3fs.', message, duration) diff --git a/src/azul/indexer/aggregate.py b/src/azul/indexer/aggregate.py deleted file mode 100644 index 5cea8ad4ed..0000000000 --- a/src/azul/indexer/aggregate.py +++ /dev/null @@ -1,635 +0,0 @@ -from __future__ import ( - annotations, -) - -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - Counter, - defaultdict, -) -import logging -import sys -from typing import ( - Any, - Callable, - Hashable, - TYPE_CHECKING, -) - -from azul import ( - R, -) -from azul.collections import ( - none_safe_key, -) -from azul.indexer.document import ( - EntityType, -) -from azul.json_freeze import ( - freeze, - thaw, -) -from azul.types import ( - AnyJSON, - JSON, - JSONs, - json_mapping, -) - -log = logging.getLogger(__name__) - -if TYPE_CHECKING: - from _typeshed import ( - SupportsAdd, - SupportsDunderGT, - SupportsDunderLT, - SupportsRichComparison, - ) - - -class Accumulator[V, A](metaclass=ABCMeta): - """ - Accumulates multiple values into a single value, not necessarily of the same - type. - """ - - def __init__(self): - self.dropped = 0 - - @abstractmethod - def accumulate(self, value: V | list[V]) -> Any: - """ - Incorporate the given value into this accumulator. If the value is not - incorporated (due to e.g. a maximum size constraint), implementations - should increment :py:attr:`dropped`. - """ - raise NotImplementedError - - @abstractmethod - def get(self) -> A: - """ - Return the accumulated value. - """ - raise NotImplementedError - - -class BaseAccumulator[V, A](Accumulator[V, A], metaclass=ABCMeta): - """ - Handles lists of values by accumulating each item individually. For certain - types of accumulators this may not be the most efficient way to handle such - lists. - """ - - def accumulate(self, value: V | list[V]) -> None: - if isinstance(value, list): - for value in value: - self._accumulate(value) - else: - self._accumulate(value) - - @abstractmethod - def _accumulate(self, value: V) -> None: - raise NotImplementedError - - -class SumAccumulator[V:SupportsAdd](BaseAccumulator[V | None, V | None]): - """ - Add values. - - Unlike the built-in sum() function, this accumulator doesn't default to an - initial value of 0 but defaults to the first accumulated value instead. Also - unlike sum(), it simply ignores None values. - """ - - def __init__(self, *, initially: V | None = None) -> None: - """ - :param initially: the initial value for the sum. If None, the first - accumulated value that is not None will be used to - initialize the sum. Note that if this parameter is - None, the return value of get() could be None, too. - """ - super().__init__() - self.value = initially - - def _accumulate(self, value: V | None) -> None: - if value is not None: - if self.value is None: - self.value = value - else: - self.value += value - - def get(self) -> V | None: - return self.value - - -class SetAccumulator[V: Hashable](Accumulator[V, list[V]]): - """ - Accumulates values into a set, discarding duplicates and, optionally, values - that would grow the set past the maximum size. The accumulated value is - returned as a sorted list. The maximum size constraint does not take the - ordering into account. This accumulator does not return a list of the N - smallest values, it returns a sorted list of the first N distinct values. - """ - - def __init__(self, - max_size: int | None = None, - key: Callable[[V], SupportsRichComparison] | None = None - ) -> None: - """ - :param max_size: the maximum number of elements to retain - - :param key: The key to be used for sorting the accumulated set of - values. If this value is None, a default None-safe key will - be used. With that default key, if any None values were - placed in the accumulator, the first element, and only the - first element of the returned list will be None. - """ - super().__init__() - self.value: set[V] = set() - self.max_size = max_size - self.key = none_safe_key(none_last=True) if key is None else key - - def accumulate(self, value: V | list[V]) -> int: - """ - :return: The number of distinct values that were incorporated. There are - two reasons a value may not be incorporated: it was already in - the set or the accumulator is full. The latter is reflected in - self.dropped - - >>> acc = SetAccumulator(max_size=4) - >>> acc.accumulate([]), acc.get(), acc.dropped - (0, [], 0) - - >>> acc.accumulate(1), acc.get(), acc.dropped - (1, [1], 0) - - >>> acc.accumulate(1), acc.get(), acc.dropped - (0, [1], 0) - - >>> acc.accumulate(2), acc.get(), acc.dropped - (1, [1, 2], 0) - - >>> acc.accumulate([1, 2, 3]), acc.get(), acc.dropped - (1, [1, 2, 3], 0) - - >>> acc.accumulate([1, 2, 3]), acc.get(), acc.dropped - (0, [1, 2, 3], 0) - - >>> acc.accumulate([3, 4, 5]), acc.get(), acc.dropped - (1, [1, 2, 3, 4], 1) - - >>> acc.accumulate([5, 6]), acc.get(), acc.dropped - (0, [1, 2, 3, 4], 3) - - >>> acc.accumulate(1), acc.get(), acc.dropped - (0, [1, 2, 3, 4], 3) - - >>> acc.accumulate(5), acc.get(), acc.dropped - (0, [1, 2, 3, 4], 4) - - The ``dropped`` attribute is incremented for each of the 5's below since - that's what would happen were they incorporated in separate calls. - - >>> acc.accumulate([4, 4, 5, 5]), acc.get(), acc.dropped - (0, [1, 2, 3, 4], 6) - - >>> acc = SetAccumulator(max_size=0) - - >>> acc.accumulate([]), acc.get(), acc.dropped - (0, [], 0) - - >>> acc.accumulate(1), acc.get(), acc.dropped - (0, [], 1) - - >>> acc.accumulate([1, 1]), acc.get(), acc.dropped - (0, [], 3) - - >>> import random - >>> l = [random.randint(0, 9) for _ in range(10000)] - >>> acc = SetAccumulator() - >>> acc.accumulate(l) - 10 - - >>> list(set(acc.get())) == acc.get() - True - - >>> set(l) == set(acc.get()) - True - - Tuples are treated as scalars. We rely on this behavior when aggregating - `ValueAndUnit` fields. - - >>> acc = SetAccumulator(max_size=2) - >>> acc.accumulate((1, 2)), acc.get(), acc.dropped - (1, [(1, 2)], 0) - - >>> acc.accumulate([(2, 1), (1, 2), ()]), acc.get(), acc.dropped - (1, [(1, 2), (2, 1)], 1) - """ - current, max_size = self.value, self.max_size - initial_len = len(current) - free_space = sys.maxsize if max_size is None else max_size - initial_len - assert free_space >= 0 - if isinstance(value, list): - if len(value) <= free_space: - # If there is sufficient free space to incorporate all values, - # even if they're all the same, do so. - current.update(value) - else: - # If there are no duplicates in the argument, we can add as many - # items from the argument as we have free space for. - current.update(value[0:free_space]) - value = value[free_space:] - new_len = len(current) - # We may still have free space left if there were duplicate - # items in the slice we just incorporated, or if some of those - # items had already been incorporated before the slice was. - num_added = new_len - initial_len - free_space -= num_added - assert free_space >= 0 - # We could repeat the above but that could lead to many slices - # of length one. Instead we'll switch to handling elements - # individually until we run out of space. - i = iter(value) - try: - while free_space > 0: - current.add(next(i)) - if new_len != len(current): - new_len += 1 - free_space -= 1 - # We've run out of space. Report any elements not already - # accumulated as dropped. - while True: - if next(i) not in current: - self.dropped += 1 - except StopIteration: - pass - else: - if free_space > 0: - current.add(value) - elif value not in current: - self.dropped += 1 - final_len = len(current) - assert max_size is None or final_len <= max_size - return final_len - initial_len - - def get(self) -> list[V]: - return sorted(self.value, key=self.key) - - -class SetOfDictAccumulator(SetAccumulator[JSON | None]): - """ - A set accumulator that supports mutable mappings as values. - - >>> acc = SetOfDictAccumulator(key=lambda d: d['foo']) - >>> d = {'foo': 2} - >>> acc.accumulate(d) - 1 - - >>> acc.accumulate(d) - 0 - - >>> d = {'foo': 1, 'bar': 1} - >>> acc.accumulate(d) - 1 - - >>> acc.accumulate([d, d]) - 0 - - >>> acc.get() - [{'foo': 1, 'bar': 1}, {'foo': 2}] - """ - - def _freeze(self, value: JSON | None) -> JSON | None: - return None if value is None else json_mapping(freeze(value)) - - def _thaw(self, value: JSON | None) -> JSON | None: - return None if value is None else json_mapping(thaw(value)) - - def accumulate(self, value: JSON | None | list[JSON | None]) -> int: - if isinstance(value, list): - # `freeze` converts lists to tuples, which the superclass treats as - # scalars instead of sequences. Passing a list as a tuple would - # therefore introduce an extraneous level of nesting, as every - # element in `value` would end up in a single element of the - # accumulated result. - value = list(map(self._freeze, value)) - else: - value = self._freeze(value) - return super().accumulate(value) - - def get(self) -> list[JSON | None]: - return [self._thaw(value) for value in super().get()] - - -if TYPE_CHECKING: - # @formatter:off (PyCharm puts two blank lines around indented top-level - # classes, flake8 wants one) - class HashableAndSupportsDunderLT(SupportsDunderLT, - Hashable, - metaclass=ABCMeta): - ... - - class HashableAndSupportsDunderGT(SupportsDunderGT, - Hashable, - metaclass=ABCMeta): - ... - - type HashableAndSortable = ( - HashableAndSupportsDunderGT - | HashableAndSupportsDunderLT - ) - # @formatter:on - - -class DictAccumulator[K: HashableAndSortable, V](Accumulator[V, list[V]]): - """ - Accumulate values into a dictionary, allowing one unique value per key, - discarding values that would exceed the maximum number of dictionary keys. - In a way this is a generalized SetAccumulator. DictAccumulator can replace a - SetAccumulator by using the identity function for the key. - """ - - def __init__(self, - *, - max_size: int | None, - key: Callable[[V], K]): - """ - :param max_size: The maximum number of elements to retain. A value of - None can be used to specify no maximum. - - :param key: A function returning the key to be used both for storing the - accumulated value and sorting the accumulated set of values. - """ - super().__init__() - self.max_size = max_size - self.key = key - self.value: dict[K, V] = {} - - def accumulate(self, value): - """ - >>> acc = DictAccumulator(max_size=3, key=lambda s: s.lower()) - >>> acc.accumulate('foo') - >>> acc.get(), acc.dropped - (['foo'], 0) - - >>> acc.accumulate('foo') - >>> acc.get(), acc.dropped - (['foo'], 0) - - >>> acc.accumulate('Foo') - Traceback (most recent call last): - ... - AssertionError: R('Ambiguos key:', 'foo', 'values:', 'foo', 'Foo') - - >>> acc.accumulate('Bar') - >>> acc.accumulate('BAZ') - >>> acc.get(), acc.dropped - (['Bar', 'BAZ', 'foo'], 0) - - >>> acc.accumulate('spam') - >>> acc.get(), acc.dropped - (['Bar', 'BAZ', 'foo'], 1) - """ - key = self.key(value) - if self.max_size is None or len(self.value) < self.max_size: - try: - old_value = self.value[key] - except KeyError: - self.value[key] = value - else: - assert old_value == value, R( - 'Ambiguos key:', key, 'values:', old_value, value) - elif key not in self.value: - self.dropped += 1 - - def get(self): - return sorted(self.value.values(), key=self.key) - - -class FrequencySetAccumulator[V](Accumulator[V, list[V]]): - """ - An accumulator that accepts any number of values and returns a list with - at most max_size most frequently occurring values. - - Note the max_size argument only limits the length of the accumulate, the - overall menory consumption of this accumulator is unbounded. - - >>> acc = FrequencySetAccumulator(max_size=2) - >>> acc.accumulate('x') - >>> acc.accumulate(['x','y']) - >>> acc.accumulate(['x','y','z']) - >>> acc.get() - ['x', 'y'] - - >>> acc = FrequencySetAccumulator(max_size=0) - >>> acc.accumulate('x') - >>> acc.get() - [] - """ - - def __init__(self, *, max_size: int) -> None: - super().__init__() - self.value: Counter[V] = Counter() - self.max_size = max_size - - def accumulate(self, value: V | list[V]) -> None: - if isinstance(value, (dict, list)): - self.value.update(value) - else: - self.value[value] += 1 - - def get(self) -> list[V]: - self.dropped = max(0, len(self.value) - self.max_size) - return [item for item, count in self.value.most_common(self.max_size)] - - -class LastValueAccumulator(Accumulator): - """ - An accumulator that accepts any number of values and returns the value most - recently seen. - """ - - def __init__(self) -> None: - super().__init__() - self.value = None - - def accumulate(self, value): - self.value = value - - def get(self): - return self.value - - -class SingleValueAccumulator(LastValueAccumulator): - """ - An accumulator that accepts any number of values given that they all are the - same value and returns a single value. Occurrence of any value that is - different than the first accumulated value raises a ValueError. - """ - - def accumulate(self, value): - if self.value is None: - super().accumulate(value) - elif self.value != value: - raise ValueError('Conflicting values:', self.value, value) - - -class MinAccumulator(LastValueAccumulator): - """ - An accumulator that returns the minimal value seen. - """ - - def accumulate(self, value): - if value is not None and (self.value is None or value < self.value): - super().accumulate(value) - - -class MaxAccumulator(LastValueAccumulator): - """ - An accumulator that returns the maximal value seen. - """ - - def accumulate(self, value): - if value is not None and (self.value is None or value > self.value): - super().accumulate(value) - - -class DistinctAccumulator[K:Hashable, V, A](BaseAccumulator[tuple[K, V], A]): - """ - An accumulator for (key, value) tuples. Of two pairs with the same key, only - the value from the first pair will be accumulated. The actual values will be - accumulated in another accumulator instance specified at construction. - - >>> acc = DistinctAccumulator(SumAccumulator(initially=0), max_size=3) - - Keys can be tuples, too. - - >>> acc.accumulate((('x', 'y'), 3)) - - Values associated with a recurring key will not be accumulated. - - >>> acc.accumulate((('x', 'y'), 4)) - >>> acc.accumulate(('a', 20)) - >>> acc.accumulate(('b', 100)) - - Accumulation stops at max_size distinct keys. - - >>> acc.accumulate(('c', 1000)) - >>> acc.get() - 123 - """ - - def __init__(self, - inner: Accumulator[V, A], - max_size: int | None = None) -> None: - super().__init__() - self.inner = inner - self.keys: SetAccumulator[K] = SetAccumulator(max_size=max_size) - - def _accumulate(self, value: tuple[K, V]) -> None: - key, value = value - if self.keys.accumulate(key): - self.inner.accumulate(value) - - def get(self) -> A: - return self.inner.get() - - -class UniqueValueCountAccumulator[V:Hashable](Accumulator[V, int]): - """ - Count the number of unique values - """ - - def __init__(self): - self.inner: SetAccumulator[V] = SetAccumulator() - super().__init__() - - def accumulate(self, value: V | list[V]) -> Any: - self.inner.accumulate(value) - - def get(self) -> int: - return len(self.inner.get()) - - -class EntityAggregator(metaclass=ABCMeta): - - def __init__(self, outer_entity_type: EntityType, entity_type: EntityType): - self.outer_entity_type = outer_entity_type - self.entity_type = entity_type - - def _transform_entity(self, entity: JSON) -> JSON: - return entity - - def _accumulator(self, field: str) -> Accumulator | None: - """ - Return the Accumulator instance to be used for the given field or None - if the field should not be accumulated. - """ - return self._default_accumulator() - - def _default_accumulator(self) -> Accumulator | None: - return SetAccumulator(max_size=100) - - @abstractmethod - def aggregate(self, entities: JSONs) -> JSONs: - raise NotImplementedError - - -type JSONAccumulator = Accumulator[AnyJSON, AnyJSON] - -type Aggregate = dict[str, JSONAccumulator | None] - - -class SimpleAggregator(EntityAggregator): - - def aggregate(self, entities: JSONs) -> JSONs: - aggregate: Aggregate = {} - for entity in entities: - self._accumulate(aggregate, entity) - return [self._aggregate(aggregate)] if aggregate else [] - - def _accumulate(self, aggregate: Aggregate, entity: JSON) -> None: - entity = self._transform_entity(entity) - for field, value in entity.items(): - try: - accumulator = aggregate[field] - except KeyError: - accumulator = self._accumulator(field) - aggregate[field] = accumulator - if accumulator is not None: - accumulator.accumulate(value) - - def _aggregate(self, aggregate: Aggregate) -> JSON: - result = {} - for k, accumulator in aggregate.items(): - if accumulator is not None: - result[k] = accumulator.get() - if accumulator.dropped > 0: - log.warning('Values were dropped %d times while aggregating %s.%s into %s', - accumulator.dropped, self.entity_type, k, self.outer_entity_type) - return result - - -type GroupKeys = tuple[Hashable, ...] - - -class GroupingAggregator(SimpleAggregator): - - def aggregate(self, entities: JSONs) -> JSONs: - aggregates: dict[GroupKeys, Aggregate] = defaultdict(dict) - for entity in entities: - group_keys = self._group_keys(entity) - aggregate = aggregates[group_keys] - self._accumulate(aggregate, entity) - return [ - self._aggregate(aggregate) - for aggregate in aggregates.values() - ] - - @abstractmethod - def _group_keys(self, entity) -> GroupKeys: - raise NotImplementedError diff --git a/src/azul/indexer/document.py b/src/azul/indexer/document.py deleted file mode 100644 index 64fd6281c7..0000000000 --- a/src/azul/indexer/document.py +++ /dev/null @@ -1,1162 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from enum import ( - Enum, -) -import re -from typing import ( - ClassVar, - Self, - overload, -) - -import attr -from more_itertools import ( - one, -) - -from azul import ( - CatalogName, - R, - config, -) -from azul.enums import ( - auto, -) -from azul.indexer import ( - BundleFQID, - SimpleSourceSpec, - SourceRef, -) -from azul.indexer.field import ( - CataloguedFieldTypes, - FieldType, - FieldTypes, - null_str, - pass_thru_bool, - pass_thru_int, - pass_thru_json, - pass_thru_str, -) -from azul.json import ( - Parseable, -) -from azul.types import ( - AnyJSON, - AnyMutableJSON, - JSON, - MutableJSON, - json_int, - json_mapping, - json_sequence, - json_str, - optional, -) - -type EntityID = str -type EntityType = str - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class EntityReference(Parseable): - entity_type: EntityType - entity_id: EntityID - - def __str__(self) -> str: - return f'{self.entity_type}/{self.entity_id}' - - @classmethod - def parse(cls, s: str) -> Self: - entity_type, entity_id = s.split('/') - return cls(entity_type=entity_type, entity_id=entity_id) - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class CataloguedEntityReference(EntityReference): - catalog: CatalogName - - def __str__(self) -> str: - return f'{self.catalog}/{super().__str__()}' - - @classmethod - def for_entity(cls, catalog: CatalogName, entity: EntityReference): - return cls(catalog=catalog, - entity_type=entity.entity_type, - entity_id=entity.entity_id) - - -class DocumentType(Enum): - contribution = 'contribution' - aggregate = 'aggregate' - replica = 'replica' - - def __repr__(self) -> str: - return f'<{self.__class__.__name__}.{self._name_}>' - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class IndexName: - """ - The name of an Elasticsearch index used by an Azul deployment, parsed into - its components. The index naming scheme underwent a number of changes during - the evolution of Azul. The different naming schemes are captured in a - `version` component. Note that the first version of the index name syntax - did not carry an explicit version. The resulting ambiguity requires entity - types to not match the version regex below. - """ - #: Every index name starts with this prefix - prefix: ClassVar[str] = 'azul' - - #: The version of the index naming scheme - version: int - - #: The name of the deployment the index belongs to - deployment: str - - #: The catalog the index belongs to - catalog: CatalogName - - #: An additional qualifier to distinguish between indices of the same - #: `doc_type`. For indices containing contribution or aggregate documents, - #: for example, this is the name of the type of entity the documents contain - #: metadata about. - qualifier: str - - #: Whether the documents in the index are contributions, aggregates, or - #: replicas - doc_type: DocumentType - - index_name_version_re: ClassVar[re.Pattern] = re.compile(r'v(\d+)') - - def __attrs_post_init__(self): - """ - >>> IndexName(version=2, - ... deployment='dev', - ... catalog='main', - ... qualifier='foo_bar', - ... doc_type=DocumentType.contribution) - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='foo_bar', - doc_type=) - - >>> IndexName(version=1, - ... deployment='', - ... catalog='', - ... qualifier='', - ... doc_type=DocumentType.contribution) - Traceback (most recent call last): - ... - AssertionError: R('Version must be 2', 1) - - >>> IndexName(version=2, - ... deployment='dev', - ... catalog=None, # noqa - ... qualifier='foo', - ... doc_type=DocumentType.contribution) - Traceback (most recent call last): - ... - AssertionError: R('Catalog name is required', None) - - >>> IndexName(version=2, - ... deployment='_', - ... catalog='foo', - ... qualifier='bar', - ... doc_type=DocumentType.contribution) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R("Deployment name '_' is too short, too long - or contains invalid characters.") - - >>> IndexName(version=2, - ... deployment='dev', - ... catalog='_', - ... qualifier='bar', - ... doc_type=DocumentType.contribution) - Traceback (most recent call last): - ... - AssertionError: R('Catalog name is invalid', '_') - - >>> IndexName(version=2, - ... deployment='dev', - ... catalog='foo', - ... qualifier='_', - ... doc_type=DocumentType.contribution) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R("qualifier is either too short, too long - or contains invalid characters: '_'") - - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='hca', - ... qualifier='foo', - ... doc_type=DocumentType.replica)) - Traceback (most recent call last): - ... - AssertionError: R('Unexpected replica qualifier', 'foo') - """ - config.validate_prefix(self.prefix) - assert self.version == 2, R('Version must be 2', self.version) - config.validate_deployment_name(self.deployment) - assert self.catalog is not None, R('Catalog name is required', self.catalog) - config.Catalog.validate_name(self.catalog) - config.validate_qualifier(self.qualifier) - if self.doc_type is DocumentType.replica: - # To shorten the string representation of replica index names, we - # expect the qualifier and document type to be the same string. - assert self.qualifier == self.doc_type.value, R( - 'Unexpected replica qualifier', self.qualifier) - assert '_' not in self.prefix, self.prefix - assert '_' not in self.deployment, self.deployment - assert self.catalog is None or '_' not in self.catalog, self.catalog - - def validate(self): - assert self.deployment == config.deployment_stage, R( - 'Index name does not use current deployment', - self, config.deployment_stage) - - @classmethod - def create(cls, - *, - catalog: CatalogName, - qualifier: str, - doc_type: DocumentType - ) -> Self: - return cls(version=2, - deployment=config.deployment_stage, - catalog=catalog, - qualifier=qualifier, - doc_type=doc_type) - - @classmethod - def parse(cls, index_name: str) -> Self: - """ - Parse the name of an index from any deployment and any version of Azul. - - >>> IndexName.parse('azul_dev') - Traceback (most recent call last): - ... - AssertionError: R('Too few index name elements', ['azul', 'dev']) - - >>> IndexName.parse('azul_foo_dev') - Traceback (most recent call last): - ... - AssertionError: R('Version is required') - - >>> IndexName.parse('azl_v2_dev_main_foo') - Traceback (most recent call last): - ... - AssertionError: R('Unexpected prefix', 'azul', 'azl') - - >>> IndexName.parse('azul_v2_dev_main_foo') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='foo', - doc_type=) - - >>> IndexName.parse('azul_v2_dev_main_foo_aggregate') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='foo', - doc_type=) - - >>> IndexName.parse('azul_v2_dev_main_foo_bar') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='foo_bar', - doc_type=) - - >>> IndexName.parse('azul_v2_dev_main_foo_bar_aggregate') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='foo_bar', - doc_type=) - - >>> IndexName.parse('azul_v2_staging_hca_foo_bar_aggregate') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='staging', - catalog='hca', - qualifier='foo_bar', - doc_type=) - - >>> IndexName.parse('azul_v2_dev_main_replica') - ... # doctest: +NORMALIZE_WHITESPACE - IndexName(version=2, - deployment='dev', - catalog='main', - qualifier='replica', - doc_type=) - - >>> IndexName.parse('azul_v2_staging__foo_bar__aggregate') - ... # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - AssertionError: R("qualifier ... 'foo_bar_'") - - >>> IndexName.parse('azul_v3_bla') - Traceback (most recent call last): - ... - AssertionError: R('Version must be 2', 3) - """ - index_name = index_name.split('_') - assert len(index_name) > 2, R('Too few index name elements', index_name) - prefix, *index_name = index_name - assert prefix == cls.prefix, R('Unexpected prefix', cls.prefix, prefix) - version = cls.index_name_version_re.fullmatch(index_name[0]) - assert version is not None, R('Version is required') - _, *index_name = index_name - version = int(version.group(1)) - assert version == 2, R('Version must be 2', version) - deployment, catalog, *index_name = index_name - if index_name[-1] == DocumentType.aggregate.value: - *index_name, _ = index_name - doc_type = DocumentType.aggregate - elif index_name == [DocumentType.replica.value]: - doc_type = DocumentType.replica - else: - doc_type = DocumentType.contribution - qualifier = '_'.join(index_name) - config.validate_qualifier(qualifier) - self = cls(version=version, - deployment=deployment, - catalog=catalog, - qualifier=qualifier, - doc_type=doc_type) - return self - - def __str__(self) -> str: - """ - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='main', - ... qualifier='foo', - ... doc_type=DocumentType.contribution)) - 'azul_v2_dev_main_foo' - - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='main', - ... qualifier='foo', - ... doc_type=DocumentType.aggregate)) - 'azul_v2_dev_main_foo_aggregate' - - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='main', - ... qualifier='foo_bar', - ... doc_type=DocumentType.contribution)) - 'azul_v2_dev_main_foo_bar' - - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='main', - ... qualifier='foo_bar', - ... doc_type=DocumentType.aggregate)) - 'azul_v2_dev_main_foo_bar_aggregate' - - >>> str(IndexName(version=2, - ... deployment='staging', - ... catalog='hca', - ... qualifier='foo_bar', - ... doc_type=DocumentType.aggregate)) - 'azul_v2_staging_hca_foo_bar_aggregate' - - >>> str(IndexName(version=2, - ... deployment='dev', - ... catalog='hca', - ... qualifier='replica', - ... doc_type=DocumentType.replica)) - 'azul_v2_dev_hca_replica' - """ - if self.doc_type is DocumentType.aggregate: - doc_type = ['aggregate'] - elif self.doc_type is DocumentType.contribution: - doc_type = [] - elif self.doc_type is DocumentType.replica: - assert self.qualifier == self.doc_type.value - doc_type = [] - else: - assert False, self.doc_type - assert self.version == 2, self - assert self.catalog is not None, R('Catalog is required') - return '_'.join([ - self.prefix, - f'v{self.version}', - self.deployment, - self.catalog, - self.qualifier, - *doc_type, - ]) - - -type CataloguedDocumentCoordinates = DocumentCoordinates[CataloguedEntityReference] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class DocumentCoordinates[E: EntityReference](metaclass=ABCMeta): - """ - The coordinates of a document ultimately define two strings: 1) the name of - the Elasticsearch index that contains the document and 2) the unique ID by - which it can be retrieved from that index. Both of these strings are - composed of smaller elements information, e.g., a reference to the entity - the document contains metadata about and the type of the document. Concrete - subclasses typically add more such elements to be encoded in their index - names and document IDs. - """ - - doc_type: ClassVar[DocumentType] - - entity: E - - @property - def index_name(self) -> str: - """ - The fully qualified name of the Elasticsearch index for a document with - these coordinates. Only call this if these coordinates use a catalogued - entity reference. You can use `.with_catalog()` to create one. - """ - assert isinstance(self.entity, CataloguedEntityReference) - return str(IndexName.create(catalog=self.entity.catalog, - qualifier=self.index_qualifier, - doc_type=self.doc_type)) - - @property - def index_qualifier(self): - return self.entity.entity_type - - @property - @abstractmethod - def document_id(self) -> str: - raise NotImplementedError - - @classmethod - def from_hit(cls, hit: JSON) -> CataloguedDocumentCoordinates: - index_name = IndexName.parse(json_str(hit['_index'])) - index_name.validate() - document_id = json_str(hit['_id']) - subcls: type[CataloguedDocumentCoordinates] - if index_name.doc_type is DocumentType.contribution: - subcls = ContributionCoordinates - elif index_name.doc_type is DocumentType.aggregate: - subcls = AggregateCoordinates - elif index_name.doc_type is DocumentType.replica: - subcls = ReplicaCoordinates - else: - assert False, index_name.doc_type - assert issubclass(subcls, DocumentCoordinates) - return subcls._from_index(index_name, document_id) - - @classmethod - @abstractmethod - def _from_index(cls, - index_name: IndexName, - document_id: str - ) -> CataloguedDocumentCoordinates: - raise NotImplementedError - - def with_catalog(self: 'DocumentCoordinates', - catalog: CatalogName | None - ) -> CataloguedDocumentCoordinates: - """ - Return coordinates for the given catalog. Only works for instances that - have no catalog or ones having the same catalog in which case ``self`` - is returned. - """ - if isinstance(self.entity, CataloguedEntityReference): - if catalog is not None: - assert self.entity.catalog == catalog, (self.entity.catalog, catalog) - return self - else: - assert catalog is not None - entity = CataloguedEntityReference.for_entity(catalog, self.entity) - return attr.evolve(self, entity=entity) - - -type CataloguedContributionCoordinates = ContributionCoordinates[CataloguedEntityReference] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class ContributionCoordinates[E: EntityReference](DocumentCoordinates[E]): - """ - Coordinates of contribution documents. Contributions originate from a - subgraph ("bundle") and represent either the addition of metadata to an - entity or the removal of metadata from an entity. - - Contributions produced by transformers don't specify a catalog. The catalog - is supplied when the contributions are written to the index and it is - guaranteed to be the same for all contributions produced in response to one - notification. When contributions are read back during aggregation, they - specify a catalog, the catalog they were read from. Because of that duality - this class has to be generic in E, the type of EntityReference. - """ - - doc_type: ClassVar[DocumentType] = DocumentType.contribution - - bundle: BundleFQID - - deleted: bool - - @property - def document_id(self) -> str: - return '_'.join(( - self.entity.entity_id, - self.bundle.uuid, - self.bundle.version, - 'deleted' if self.deleted else 'exists' - )) - - @classmethod - def _from_index(cls, - index_name: IndexName, - document_id: str - ) -> CataloguedContributionCoordinates: - entity_type = index_name.qualifier - assert index_name.doc_type is DocumentType.contribution - deleted: str | bool - entity_id, bundle_uuid, bundle_version, deleted = document_id.split('_') - if deleted == 'deleted': - deleted = True - elif deleted == 'exists': - deleted = False - else: - assert False, deleted - entity = CataloguedEntityReference(catalog=index_name.catalog, - entity_type=entity_type, - entity_id=entity_id) - bundle = BundleFQID(uuid=bundle_uuid, version=bundle_version) - return ContributionCoordinates(entity=entity, bundle=bundle, deleted=deleted) - - def __str__(self) -> str: - return ' '.join(( - 'deletion of' if self.deleted else 'contribution to', - str(self.entity), - 'by bundle', self.bundle.uuid, 'at', self.bundle.version - )) - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class AggregateCoordinates(DocumentCoordinates[CataloguedEntityReference]): - """ - Coordinates of aggregate documents. Aggregate coordinates always carry a - catalog. - """ - - doc_type: ClassVar[DocumentType] = DocumentType.aggregate - - @classmethod - def _from_index(cls, index_name: IndexName, document_id: str) -> Self: - entity_type = index_name.qualifier - assert index_name.doc_type is DocumentType.aggregate - return cls(entity=CataloguedEntityReference(catalog=index_name.catalog, - entity_type=entity_type, - entity_id=document_id)) - - def __attrs_post_init__(self): - assert isinstance(self.entity, CataloguedEntityReference), type(self.entity) - - @property - def document_id(self) -> str: - return self.entity.entity_id - - def __str__(self) -> str: - return f'aggregate for {self.entity}' - - -type CataloguedReplicaCoordinates = ReplicaCoordinates[CataloguedEntityReference] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True, slots=True) -class ReplicaCoordinates[E: EntityReference](DocumentCoordinates[E]): - """ - Coordinates of replica documents. Replicas are content-addressed, so these - coordinates depend not only on the entity reference, but on the contents of - the underlying metadata document. - """ - - doc_type: ClassVar[DocumentType] = DocumentType.replica - - #: A hash of the replica's JSON document - content_hash: str - - # Overrides the property in the base class. We need this to be statically - # accessible through the class. - index_qualifier: ClassVar[str] = 'replica' - - # The current v2 index name encoding depends on this - assert index_qualifier == doc_type.value - - @property - def document_id(self) -> str: - return '_'.join(( - self.entity.entity_type, - self.entity.entity_id, - self.content_hash - )) - - @classmethod - def _from_index(cls, - index_name: IndexName, - document_id: str - ) -> CataloguedReplicaCoordinates: - assert index_name.doc_type is DocumentType.replica, index_name - assert index_name.qualifier == cls.index_qualifier, index_name - # entity_type, the first component, may contain underscores - entity_type, entity_id, content_hash = document_id.rsplit('_', 2) - entity = CataloguedEntityReference(catalog=index_name.catalog, - entity_type=entity_type, - entity_id=entity_id) - return ReplicaCoordinates(content_hash=content_hash, entity=entity) - - def __str__(self) -> str: - return f'replica of {self.entity}' - - -FieldPathElement = str -FieldPath = tuple[FieldPathElement, ...] - -InternalVersion = tuple[int, int] - - -class OpType(Enum): - #: Write the document to the index, overwriting it if it already exists - index = auto() - - #: Write the document to the index or fail if it already exists - create = auto() - - #: Remove the document from the index or fail if it does not exist - delete = auto() - - #: Modify a document in the index via a scripted update or create it if it - #: does not exist - update = auto() - - -@attr.s(frozen=False, kw_only=True, auto_attribs=True) -class Document[C: DocumentCoordinates](metaclass=ABCMeta): - needs_translation: ClassVar[bool] = True - - coordinates: C - - version: InternalVersion | None - - # In the index, the `contents` property is always present and never null in - # documents. In instances of the Aggregate subclass, this attribute is None - # when they were created from documents that were retrieved from the - # index while intentionally excluding that property for efficiency. In - # instances of the Contribution subclass, this attribute is never None. - # - contents: JSON | None - - @property - def entity(self) -> EntityReference: - return self.coordinates.entity - - @property - @abstractmethod - def op_type(self) -> OpType: - """ - Get the ES client method to use when writing this document to the index. - """ - raise NotImplementedError - - @op_type.setter - @abstractmethod - def op_type(self, value: OpType): - """ - Set the ES client method to use when writing this document to the index. - This setter is optional, concrete classes may raise NotImplementedError - in their implementations and callers must gracefully handle that case. - """ - raise NotImplementedError - - @classmethod - def field_types(cls, field_types: FieldTypes) -> FieldTypes: - return { - 'entity_id': null_str, - 'contents': field_types - } - - @classmethod - @overload - def translate_fields(cls, - doc: JSON, - field_types: FieldType | FieldTypes, - *, - forward: bool, - allowed_paths: list[FieldPath] | None = None - ) -> MutableJSON: - ... - - @classmethod - @overload - def translate_fields(cls, - doc: AnyJSON, - field_types: FieldType | FieldTypes, - *, - forward: bool, - allowed_paths: list[FieldPath] | None = None, - path: FieldPath - ) -> AnyMutableJSON: - ... - - @classmethod - def translate_fields(cls, - doc: AnyJSON, - field_types: FieldType | FieldTypes, - *, - forward: bool, - allowed_paths: list[FieldPath] | None = None, - path: FieldPath = () - ) -> AnyMutableJSON: - """ - Traverse a document to translate field values for insert into - Elasticsearch, or to translate back response data. This is done to - support None/null values since Elasticsearch does not index these - values. Values that are empty lists ([]) and lists of None ([None]) are - both forward converted to [null_string] - - :param doc: A document dict of values - - :param field_types: A mapping of field paths to field type - - :param forward: If True, substitute None values with their respective - Elasticsearch placeholder. - - :param allowed_paths: A list of field paths expected to be present in - the resulting document. If an unexpected field is - found, an AssertionError will be raised. - - :param path: Used internally during document traversal to capture the - current path into the document as a tuple of keys. - - :return: A copy of the original document with values translated - according to their type. - """ - if isinstance(field_types, dict): - if isinstance(doc, dict): - new_doc = {} - for key, val in doc.items(): - if key.endswith('_'): - # Shadow copy fields should only be present during a reverse - # translation and we skip over to remove them. - assert not forward, path - else: - try: - field_type = field_types[key] - except KeyError: - raise KeyError(f'Key {key!r} not defined in field_types') - except TypeError: - raise TypeError(f'Key {key!r} not defined in field_types') - new_doc[key] = cls.translate_fields(val, - field_type, - forward=forward, - allowed_paths=allowed_paths, - path=(*path, key)) - if forward and isinstance(field_type, FieldType) and field_type.shadowed: - # Add a non-translated shadow copy of this field's - # numeric value for sum aggregations - new_doc[key + '_'] = val - return new_doc - elif isinstance(doc, list): - return [ - cls.translate_fields(val, - field_types, - forward=forward, - allowed_paths=allowed_paths, - path=path) - for val in doc - ] - else: - assert False, (path, type(doc)) - else: - if isinstance(field_types, list): - # FIXME: Assert that a non-list field_type implies a non-list - # doc (only possible for contributions). - # https://github.com/DataBiosphere/azul/issues/2689 - assert isinstance(doc, list), (doc, path) - - field_types = one(field_types) - if isinstance(field_types, FieldType): - field_type = field_types - else: - assert False, (path, type(field_types)) - if allowed_paths is not None: - # An allowed path may be a prefix instead of a complete path, - # as is the case for `contents.files.related_files` - assert path in allowed_paths or path[:-1] in allowed_paths, (path, allowed_paths) - if forward: - if isinstance(doc, list): - if not doc and field_type.allow_sorting_by_empty_lists: - # Translate an empty list to a list containing a single - # None value (and then further translate that None value - # according to the field type) so ES doesn't discard it. - # That way, documents with fields that are empty lists - # are placed at the beginning (end) of an ascending - # (descending) sort. PassTrough fields like - # contents.metadata should not undergo this transformation. - doc = [None] - return [field_type.to_index(value) for value in doc] - else: - return field_type.to_index(doc) - else: - if isinstance(doc, list): - assert doc or not field_type.allow_sorting_by_empty_lists - return [field_type.from_index(value) for value in doc] - else: - return field_type.from_index(doc) - - def to_json(self) -> JSON: - assert self.contents is not None, self - return dict(entity_id=self.coordinates.entity.entity_id, - contents=self.contents) - - @classmethod - def from_json(cls, - *, - coordinates: C, - document: JSON, - version: InternalVersion | None, - **kwargs, - ) -> Self: - self = cls(coordinates=coordinates, - version=version, - contents=optional(json_mapping, document.get('contents')), - **kwargs) - assert document['entity_id'] == self.entity.entity_id - return self - - @classmethod - def mandatory_source_fields(cls) -> list[str]: - """ - A list of dot-separated field paths into the source of each document - that :meth:`from_json` expects to be present. Subclasses that override - that method should also override this one. - """ - return ['entity_id'] - - @classmethod - def from_index(cls, - field_types: CataloguedFieldTypes, - hit: JSON, - *, - coordinates: CataloguedDocumentCoordinates | None = None - ) -> Self: - if coordinates is None: - coordinates = DocumentCoordinates.from_hit(hit) - document = json_mapping(hit['_source']) - if cls.needs_translation: - document = cls.translate_fields(document, - field_types[coordinates.entity.catalog], - forward=False) - try: - version = json_int(hit['_seq_no']), json_int(hit['_primary_term']) - except KeyError: - assert '_seq_no' not in hit - assert '_primary_term' not in hit - version = None - - assert isinstance(coordinates, cls.coordinate_cls()) - - return cls.from_json(coordinates=coordinates, - document=json_mapping(document), - version=version) - - @classmethod - @abstractmethod - def coordinate_cls(cls) -> type[C]: - pass - - def to_index(self, - catalog: CatalogName | None, - field_types: CataloguedFieldTypes - ) -> JSON: - """ - Prepare a request to write this document to the index. The return value - is a dictionary with keyword arguments to the ES client method selected - by the :meth:`op_type` property. - - :param catalog: An optional catalog name. If None, this document's - coordinates must supply it. Otherwise this document's - coordinates must supply the same catalog or none at all. - - :param field_types: A mapping of field paths to field type - - :return: Request parameters for indexing - """ - coordinates = self.coordinates.with_catalog(catalog) - result: dict[str, AnyJSON] = { - 'index': coordinates.index_name, - 'id': self.coordinates.document_id - } - if self.op_type is not OpType.delete: - result['body'] = self._body(field_types[coordinates.entity.catalog]) - if self.version is not None: - result['if_seq_no'], result['if_primary_term'] = self.version - if self.op_type is OpType.update: - result['params'] = {'retry_on_conflict': 3} - return result - - def _body(self, field_types: FieldTypes) -> JSON: - body = self.to_json() - if self.needs_translation: - body = self.translate_fields(doc=body, - field_types=field_types, - forward=True) - return body - - -class DocumentSource(SourceRef[SimpleSourceSpec]): - pass - - -@attr.s(frozen=False, kw_only=True, auto_attribs=True) -class Contribution[E: EntityReference](Document[ContributionCoordinates[E]]): - - @classmethod - def coordinate_cls(cls) -> type[ContributionCoordinates[E]]: - return ContributionCoordinates - - # This narrows the type declared in the superclass. See comment there. - contents: JSON - source: DocumentSource - - #: The op_type attribute will change to OpType.index if writing - #: to Elasticsearch fails with 409 - _op_type: OpType = OpType.create - - @property - def op_type(self) -> OpType: - return self._op_type - - @op_type.setter - def op_type(self, op_type: OpType): - self._op_type = op_type - - def __attrs_post_init__(self): - assert self.contents is not None - assert isinstance(self.coordinates, ContributionCoordinates) - assert self.coordinates.doc_type is DocumentType.contribution - - @classmethod - def field_types(cls, field_types: FieldTypes) -> FieldTypes: - return { - **super().field_types(field_types), - 'document_id': null_str, - 'source': pass_thru_json, - # These pass-through fields will never be None - 'bundle_uuid': pass_thru_str, - 'bundle_version': pass_thru_str, - 'bundle_deleted': pass_thru_bool - } - - @classmethod - def from_json(cls, - *, - coordinates: ContributionCoordinates[E], - document: JSON, - version: InternalVersion | None, - **kwargs - ) -> Self: - self = super().from_json(coordinates=coordinates, - document=document, - version=version, - source=DocumentSource.from_json(document['source']), - **kwargs) - assert self.coordinates.document_id == document['document_id'] - assert self.coordinates.bundle.uuid == document['bundle_uuid'] - assert self.coordinates.bundle.version == document['bundle_version'] - assert self.coordinates.deleted == document['bundle_deleted'] - return self - - @classmethod - def mandatory_source_fields(cls) -> list[str]: - return super().mandatory_source_fields() + [ - 'contents', - 'document_id', - 'source', - 'bundle_uuid', - 'bundle_version', - 'bundle_deleted' - ] - - def to_json(self): - return dict(super().to_json(), - document_id=self.coordinates.document_id, - source=self.source.to_json(), - bundle_uuid=self.coordinates.bundle.uuid, - bundle_version=self.coordinates.bundle.version, - bundle_deleted=self.coordinates.deleted) - - -@attr.s(frozen=False, kw_only=True, auto_attribs=True) -class Aggregate(Document[AggregateCoordinates]): - sources: set[DocumentSource] - bundles: list[BundleFQID] | None - num_contributions: int - - def __attrs_post_init__(self): - assert isinstance(self.coordinates, AggregateCoordinates) - assert self.coordinates.doc_type is DocumentType.aggregate - - @classmethod - def coordinate_cls(cls) -> type[AggregateCoordinates]: - return AggregateCoordinates - - @classmethod - def field_types(cls, field_types: FieldTypes) -> FieldTypes: - return { - **super().field_types(field_types), - 'num_contributions': pass_thru_int, - 'sources': { - 'id': pass_thru_str, - 'spec': pass_thru_str - }, - 'bundles': { - 'uuid': pass_thru_str, - 'version': pass_thru_str, - } - } - - @classmethod - def from_json(cls, - *, - coordinates: AggregateCoordinates, - document: JSON, - version: InternalVersion | None, - **kwargs - ) -> Self: - sources = set(map(DocumentSource.from_json, json_sequence(document['sources']))) - bundles = optional(json_sequence, document.get('bundles')) - bundles = None if bundles is None else list(map(BundleFQID.from_json, bundles)) - num_contributions = json_int(document['num_contributions']) - self = super().from_json(coordinates=coordinates, - document=document, - version=version, - num_contributions=num_contributions, - sources=sources, - bundles=bundles) - assert isinstance(self, Aggregate) - return self - - @classmethod - def mandatory_source_fields(cls) -> list[str]: - return super().mandatory_source_fields() + [ - 'num_contributions', - 'sources.id', - 'sources.spec' - ] - - def to_json(self) -> JSON: - sources = [source.to_json() for source in self.sources] - if self.bundles is None: - bundles = None - else: - bundles = [bundle.to_json() for bundle in self.bundles] - return dict(super().to_json(), - num_contributions=self.num_contributions, - sources=sources, - bundles=bundles) - - @property - def op_type(self) -> OpType: - if self.contents: - return OpType.create if self.version is None else OpType.index - else: - # Aggregates are deleted when their contents goes blank - return OpType.delete - - @op_type.setter - def op_type(self, value: OpType): - raise NotImplementedError - - -@attr.s(frozen=False, kw_only=True, auto_attribs=True) -class Replica[E: EntityReference](Document[ReplicaCoordinates[E]]): - """ - A verbatim copy of a metadata document - """ - - #: The type of replica, i.e., what sort of metadata document from the - #: underlying data repository we are storing a copy of. In practice, this is - #: the same as `self.coordinates.entity.entity_type`, but this isn't - #: necessarily the case. - #: - #: Typically, all replicas of the same type have similar shapes, just like - #: contributions for entities of the same type. However, mixing replicas of - #: different types results in an index containing documents of heterogeneous - #: shapes. Document heterogeneity is a problem for ES, but we deal with it - #: by disabling the ES index mapping, essentially turning off the reverse - #: index that ES normally builds from these documents and using the index - #: only to store and retrieve the documents by their coordinates. - replica_type: str - - contents: JSON - - source: DocumentSource - - hub_ids: list[EntityID] - - needs_translation: ClassVar[bool] = False - - def __attrs_post_init__(self): - assert isinstance(self.coordinates, ReplicaCoordinates) - assert self.coordinates.doc_type is DocumentType.replica - - @classmethod - def coordinate_cls(cls) -> type[ReplicaCoordinates]: - return ReplicaCoordinates - - @classmethod - def field_types(cls, field_types: FieldTypes) -> FieldTypes: - # Replicas do not undergo translation - raise NotImplementedError - - def to_json(self) -> JSON: - return dict(super().to_json(), - source=self.source.to_json(), - replica_type=self.replica_type, - # Ensure that index contents is deterministic for unit tests - hub_ids=sorted(set(self.hub_ids))) - - @property - def op_type(self) -> OpType: - return OpType.update - - @op_type.setter - def op_type(self, value: OpType): - raise NotImplementedError - - def _body(self, field_types: FieldTypes) -> JSON: - return { - 'script': { - 'source': ''' - Stream stream = Stream.concat(ctx._source.hub_ids.stream(), - params.hub_ids.stream()); - ctx._source.hub_ids = stream.sorted().distinct().collect(Collectors.toList()); - ''', - 'params': { - 'hub_ids': self.hub_ids - } - }, - 'upsert': super()._body(field_types) - } - - -CataloguedContribution = Contribution[CataloguedEntityReference] diff --git a/src/azul/indexer/document_service.py b/src/azul/indexer/document_service.py deleted file mode 100644 index 27756e4421..0000000000 --- a/src/azul/indexer/document_service.py +++ /dev/null @@ -1,134 +0,0 @@ -from collections.abc import ( - Iterable, -) -from typing import ( - Type, -) - -from more_itertools import ( - one, -) - -from azul import ( - CatalogName, - cache, - config, -) -from azul.collections import ( - deep_dict_merge, -) -from azul.indexer.document import ( - Aggregate, - Contribution, - Document, -) -from azul.indexer.field import ( - CataloguedFieldTypes, - FieldType, - FieldTypes, - Nested, -) -from azul.indexer.transform import ( - Transformer, -) -from azul.plugins import ( - FieldPath, - MetadataPlugin, -) -from azul.types import ( - AnyJSON, - AnyMutableJSON, -) - - -class DocumentService: - - @cache - def metadata_plugin(self, catalog: CatalogName) -> MetadataPlugin: - return MetadataPlugin.load(catalog).create() - - @cache - def aggregate_class(self, catalog: CatalogName) -> Type[Aggregate]: - return self.metadata_plugin(catalog).aggregate_class() - - @property - def always_limit_access(self) -> bool: - """ - True if access restrictions are enforced unconditionally. False, if the - filter stage is allowed to weaken them, e.g., based on the entity type. - """ - return True - - def transformer_types(self, - catalog: CatalogName - ) -> Iterable[Type[Transformer]]: - return self.metadata_plugin(catalog).transformer_types() - - @cache - def entity_types(self, catalog: CatalogName) -> list[str]: - return [ - transformer_cls.entity_type() - for transformer_cls in self.transformer_types(catalog) - ] - - @cache - def field_type(self, catalog: CatalogName, path: FieldPath) -> FieldType: - """ - Get the type of the field at the given document path. - - :param catalog: The catalog to operate on. Different catalogs may use - different field types. - - :param path: A tuple of keys to traverse document. - """ - field_types = self.field_types(catalog) - for element in path: - try: - field_types = field_types[element] - except (KeyError, TypeError) as e: - if isinstance(field_types, list): - field_types = one(field_types) - if isinstance(field_types, Nested) and element == field_types.agg_property: - field_types = field_types.properties[element] - else: - raise type(e)('Path not represented in field_types', path) - if isinstance(field_types, list): - field_types = one(field_types) - return field_types - - def field_types(self, catalog: CatalogName) -> FieldTypes: - """ - Returns a mapping of fields to field types - - :return: dict with nested keys matching Elasticsearch fields and values - with the field's type - """ - field_types = deep_dict_merge.from_iterable( - transformer_cls.field_types() - for transformer_cls in self.transformer_types(catalog) - ) - aggregate_cls = self.aggregate_class(catalog) - return deep_dict_merge( - Contribution.field_types(field_types), - aggregate_cls.field_types(field_types) - # Replicas are intentionally omitted here because their contents - # does not undergo translation - ) - - def catalogued_field_types(self) -> CataloguedFieldTypes: - return { - catalog: self.field_types(catalog) - for catalog in config.catalogs - } - - def translate_fields(self, - catalog: CatalogName, - doc: AnyJSON, - *, - forward: bool, - allowed_paths: list[FieldPath] | None = None - ) -> AnyMutableJSON: - return Document.translate_fields(doc, - self.field_types(catalog), - forward=forward, - allowed_paths=allowed_paths) diff --git a/src/azul/indexer/field.py b/src/azul/indexer/field.py deleted file mode 100644 index 33eeae8a0f..0000000000 --- a/src/azul/indexer/field.py +++ /dev/null @@ -1,504 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from datetime import ( - datetime, - timezone, -) -import sys -from types import ( - UnionType, -) -from typing import ( - ClassVar, - Final, - Iterable, - Mapping, - Sequence, - TypeAliasType, - TypedDict, - cast, -) - -from more_itertools import ( - first, - one, -) - -from azul import ( - CatalogName, - cached_property, -) -from azul.openapi import ( - schema, -) -from azul.time import ( - format_dcp2_datetime, - parse_dcp2_datetime, -) -from azul.types import ( - AnyJSON, - JSON, - PrimitiveJSON, - reify, -) - -# A type variable named ``N`` denotes the native type of a field in documents as -# they are being created by a transformer or processed by an aggregator. -# -# A type variable named ``X`` denotes the type of a field in a document just -# before it's being written to the index. Think "index type". - -#: The static (build time) type of a document field value -#: -type Form[T] = type[T] | TypeAliasType | UnionType - -#: The upper bound on the type of field values stored in the index: -#: -#: Note that while ``IndexRange`` *is* assignable to JSON, ``mypy`` doesn't -#: realize that hence the need for the union in the definition. -#: -type IndexForm = AnyJSON | IndexRange - - -#: The Elasticsearch index representation of ranges along with a factory -#: -class IndexRange[X: IndexForm](TypedDict): - gte: X - lte: X - - -def index_range[X: IndexForm](gte: X, lte: X) -> IndexRange[X]: - return dict(gte=gte, lte=lte) - - -#: The native and API representations of ranges -#: -type Range[E] = tuple[E, E] -type ApiRange = Range[AnyJSON] | list[AnyJSON] - -#: While Elasticsearch distinguishes between integers and floating point numbers -#: in its index, JSON does not. Since all payloads to and from Elasticsearch are -#: serialized as JSON we have to be prepared to get 1 back when we write 1.0. -#: -type JSONNumber = int | float - - -class FieldType[N, X: IndexForm](metaclass=ABCMeta): - shadowed: ClassVar[bool] = False - es_sort_mode: ClassVar[str] = 'min' - allow_sorting_by_empty_lists: ClassVar[bool] = True - - def __init__(self, native_form: Form[N], index_form: Form[X]): - self.native_form: Final[Form[N]] = native_form - self.index_form: Final[Form[X]] = index_form - - @cached_property - def native_types(self) -> tuple[type, ...]: - """ - The possible runtime (reified) types of the value of document fields - of this type. - """ - return reify(self.native_form) - - @cached_property - def index_types(self) -> tuple[type, ...]: - return reify(self.index_form) - - @property - @abstractmethod - def es_type(self) -> str | None: - raise NotImplementedError - - @abstractmethod - def to_index(self, value: N) -> X: - raise NotImplementedError - - @abstractmethod - def from_index(self, value: X) -> N: - raise NotImplementedError - - def to_tsv(self, value: N) -> str: - return '' if value is None else str(value) - - @property - def api_schema(self) -> JSON: - """ - The JSONSchema describing fields of this type in OpenAPI specifications. - """ - return schema.coalesce(self.native_types) - - def from_api(self, value: AnyJSON) -> N: - """ - Convert a deserialized JSON value occurring as an input to a REST API - to the native representation of values of this field type. - - The default implementation assumes that the REST API representation - of the value is already of the native type, and just returns the - argument. Subclasses must override this if the native and API - representations differ. An API representation of a field only occurs - in inputs to a REST API. Outputs like the body of a response use the - native representation. - """ - assert isinstance(value, self.native_types), (value, self) - return cast(N, value) - - @property - def supported_filter_relations(self) -> tuple[str, ...]: - """ - The filter relations in which fields of this type can be used as a - left-handside. By default, this class only supports equality. A scalar - field type would override this method to include the `within` relation. - """ - return 'is', - - def api_filter_schema(self, relation: str) -> JSON: - """ - The JSONSchema describing the right-handside operand of the given filter - relation in OpenAPI specifications when the left-handside operand is a - field of this type. - """ - assert relation in self.supported_filter_relations, relation - if relation == 'is': - return self.api_schema - elif relation == 'within': - return self._api_range_schema(self.api_schema) - else: - assert False, relation - - def _api_range_schema(self, api_schema: JSON) -> JSON: - return schema.array(api_schema, minItems=2, maxItems=2) - - def _range_to_index(self, value: Range[N]) -> IndexRange[X]: - gte, lte = value - return index_range(self.to_index(gte), self.to_index(lte)) - - def _from_api_range(self, value: AnyJSON) -> Range[N]: - assert isinstance(value, (list, tuple)) and len(value) == 2, value - gte, lte = value - return self.from_api(gte), self.from_api(lte) - - def filter(self, - relation: str, - values: Iterable[AnyJSON | ApiRange] - ) -> Iterable[X | IndexRange[X]]: - if relation == 'within': - return list(map(self._range_to_index, map(self._from_api_range, values))) - else: - return list(map(self.to_index, map(self.from_api, values))) - - -class PassThrough[T: AnyJSON](FieldType[T, T]): - allow_sorting_by_empty_lists = False - - def __init__(self, type: Form[T], *, es_type: str | None): - super().__init__(type, type) - self._es_type = es_type - - @property - def es_type(self) -> str | None: - return self._es_type - - def to_index(self, value: T) -> T: - return value - - def from_index(self, value: T) -> T: - return value - - -# FIXME: change the es_type for JSON to `nested` -# https://github.com/DataBiosphere/azul/issues/2621 -pass_thru_json: PassThrough[JSON] = PassThrough(JSON, es_type=None) - - -class NumericPassThrough[T: JSONNumber](PassThrough[T]): - - @property - def supported_filter_relations(self) -> tuple[str, ...]: - return *super().supported_filter_relations, 'within' - - def from_api(self, value: AnyJSON) -> T: - """ - 1.0 is a valid JSONSchema `integer` - - >>> pass_thru_int.from_api(1.0) - 1 - - 1 is a valid JSONSchema `number` - - >>> pass_thru_float.from_api(1) - 1.0 - - 1.1 is not a valid JSONSchema `integer` - - >>> pass_thru_int.from_api(1.1) - Traceback (most recent call last): - ... - AssertionError: 1.1 - - 1.1 is a valid JSONSchema `float` - - >>> pass_thru_float.from_api(1.1) - 1.1 - """ - assert isinstance(value, (int, float)) - native_type, = self.native_types - native_value = native_type(value) - assert native_value == value, value - assert isinstance(native_value, native_type) - return cast(T, native_value) - - -pass_thru_str = PassThrough(str, es_type='keyword') -pass_thru_int = NumericPassThrough(int, es_type='long') -pass_thru_float = NumericPassThrough(float, es_type='double') -pass_thru_bool = PassThrough(bool, es_type='boolean') - - -class Nullable[N, X: IndexForm](FieldType[N | None, X], metaclass=ABCMeta): - - def __init__(self, native_type: type[N], translated_from: Form[X]) -> None: - self.native_type: Final[type[N]] = native_type - super().__init__(native_type | None, translated_from) - - @property - def api_schema(self) -> JSON: - return schema.nullable(schema.make(self.native_type)) - - -class NullableScalar[N, X: IndexForm](Nullable[N, X], metaclass=ABCMeta): - - def api_filter_schema(self, relation: str) -> JSON: - if relation == 'within': - # The LHS operand of a range relation can't be null - api_type = schema.make(self.native_type) - return self._api_range_schema(api_type) - else: - return super().api_filter_schema(relation) - - @property - def supported_filter_relations(self) -> tuple[str, ...]: - return *super().supported_filter_relations, 'within' - - -class NullableString(Nullable[str, str]): - # Note that the replacement values for `None` used for each data type - # ensure that `None` values are placed at the end of a sorted list. - null_string = '~null' - es_type = 'keyword' - - def __init__(self): - super().__init__(str, str) - - def to_index(self, value: str | None) -> str: - return self.null_string if value is None else value - - def from_index(self, value: str) -> str | None: - return None if value == self.null_string else value - - -null_str = NullableString() - - -class NullableNumber[U: bool | int | float](NullableScalar[U, JSONNumber]): - shadowed = True - # Maximum int that can be represented as a 64-bit int and double IEEE - # floating point number. This prevents loss when converting between the two. - null_value = sys.maxsize - 1023 - assert null_value == int(float(null_value)) - - def __init__(self, native_type: type[U], es_type: str) -> None: - assert native_type in (bool, int, float) - super().__init__(native_type, JSONNumber) - self._es_type = es_type - - @property - def es_type(self) -> str | None: - return self._es_type - - def to_index(self, value: U | None) -> JSONNumber: - if value is None: - return self.null_value - elif value is False: - return 0 - elif value is True: - return 1 - else: - assert value < self.null_value, (value, self.null_value) - return value - - def from_index(self, value: JSONNumber) -> U | None: - if value == self.null_value: - return None - else: - return self._from_json(value) - - def _from_json(self, value: AnyJSON) -> U | None: - assert isinstance(value, (int, float)) - native_type = self.native_type - native_value = native_type(value) - assert native_value == value, value - assert isinstance(native_value, native_type) - return native_value - - def from_api(self, value: AnyJSON) -> U | None: - """ - 1.0 is a valid JSONSchema `integer` - - >>> null_int.from_api(1.0) - 1 - - 1 is a valid JSONSchema `number` - - >>> pass_thru_float.from_api(1) - 1.0 - - 1.1 is not a valid JSONSchema `integer` - - >>> null_int.from_api(1.1) - Traceback (most recent call last): - ... - AssertionError: 1.1 - - 1.1 is a valid JSONSchema `float` - - >>> pass_thru_float.from_api(1.1) - 1.1 - """ - if value is None: - return None - else: - return self._from_json(value) - - -null_int = NullableNumber(int, 'long') -null_float = NullableNumber(float, 'double') - - -class NullableBool(NullableNumber[bool]): - shadowed = False - - def __init__(self): - super().__init__(bool, 'boolean') - - @property - def supported_filter_relations(self) -> tuple[str, ...]: - return 'is', # no point in supporting range relation - - -null_bool = NullableBool() - - -class NullableDateTime(Nullable[str, str]): - es_type = 'date' - null = format_dcp2_datetime(datetime(9999, 1, 1, tzinfo=timezone.utc)) - - def to_index(self, value: str | None) -> str: - if value is None: - return self.null - else: - parse_dcp2_datetime(value) - return value - - def from_index(self, value: str) -> str | None: - if value == self.null: - return None - else: - return value - - -null_datetime: NullableDateTime = NullableDateTime(str, str) - - -class Nested(PassThrough[JSON]): - properties: Mapping[str, FieldType] - agg_property: str - - def __init__(self, **properties): - super().__init__(JSON, es_type='nested') - self.agg_property = first(properties.keys()) - self.properties = properties - - def api_filter_schema(self, relation: str) -> JSON: - assert relation == 'is' - properties, required = {}, [] - for field, field_type in self.properties.items(): - properties[field] = field_type.api_filter_schema(relation) - if not isinstance(field_type, Nullable): - required.append(field) - kwargs: dict[str, AnyJSON] = dict(additionalProperties=False) - if required: - kwargs['required'] = required - return schema.object(properties=properties, **kwargs) - - def filter(self, - relation: str, - values: Iterable[AnyJSON | ApiRange] - ) -> Iterable[JSON | IndexRange[JSON]]: - nested_object = one(values) - assert isinstance(nested_object, dict) - query_filters = {} - for nested_field, nested_value in nested_object.items(): - nested_type = self.properties[nested_field] - to_index = nested_type.to_index - query_filters[nested_field] = to_index(nested_value) - return [query_filters] - - -class ClosedRange[N: PrimitiveJSON, X: IndexForm](FieldType[Range[N], IndexRange[X]]): - - def __init__(self, ends_type: FieldType[N, X]): - super().__init__(reify(Range[N]), reify(JSON)) - self.ends_type = ends_type - - @property - def es_type(self) -> str | None: - return None - - def to_index(self, value: Range[N]) -> IndexRange[X]: - gte, lte = value - to_index = self.ends_type.to_index - return index_range(to_index(gte), to_index(lte)) - - def from_index(self, value: IndexRange[X]) -> Range[N]: - from_index = self.ends_type.from_index - return from_index(value['gte']), from_index(value['lte']) - - @property - def api_schema(self): - return self._api_range_schema(self.ends_type.api_schema) - - @property - def supported_filter_relations(self) -> tuple[str, ...]: - return 'is', 'within', 'contains', 'intersects' - - def api_filter_schema(self, relation: str) -> JSON: - if relation == 'contains': - # A range can contain a range or a value - return schema.union(self.ends_type.api_schema, self.api_schema) - else: - return self.api_schema - - def from_api(self, value: AnyJSON) -> Range[N]: - return self.ends_type._from_api_range(value) - - def filter(self, - relation: str, - values: Iterable[AnyJSON] - ) -> Iterable[IndexRange[X]]: - result = [] - for value in values: - if isinstance(value, list): - pass - elif relation == 'contains' and isinstance(value, reify(PrimitiveJSON)): - value = [value, value] - else: - assert False, (relation, value) - result.append(self.to_index(self.from_api(value))) - return result - - -type FieldTypes1 = Mapping[str, FieldTypes1] | Sequence[FieldType] | FieldType -type FieldTypes = Mapping[str, FieldTypes1] -type CataloguedFieldTypes = Mapping[CatalogName, FieldTypes] diff --git a/src/azul/indexer/index_controller.py b/src/azul/indexer/index_controller.py deleted file mode 100644 index dfa4069a7b..0000000000 --- a/src/azul/indexer/index_controller.py +++ /dev/null @@ -1,285 +0,0 @@ -from collections.abc import ( - Iterable, -) -import http -import logging -from typing import ( - Any, -) -import uuid - -import chalice -from chalice.app import ( - SQSRecord, - UnauthorizedError, -) - -from azul import ( - CatalogName, - R, - cached_property, - config, -) -from azul.azulclient import ( - AzulClient, -) -from azul.chalice import ( - LambdaMetric, -) -from azul.hmac import ( - HMACAuthentication, -) -from azul.indexer import ( - BundlePartition, -) -from azul.indexer.action_controller import ( - ActionController, -) -from azul.indexer.index_queue_service import ( - DocumentTally, - IndexAction, - IndexQueueService, -) -from azul.openapi import ( - format_description as fd, - params, - schema, -) -from azul.openapi.responses import ( - json_content, -) -from azul.queues import ( - Queues, - SQSFifoMessage, -) - -log = logging.getLogger(__name__) - - -class IndexController(ActionController[IndexAction]): - - @cached_property - def index_queue_service(self) -> IndexQueueService: - return IndexQueueService() - - @cached_property - def client(self) -> AzulClient: - return AzulClient() - - @property - def actions_are_fifo(self) -> bool: - return False - - def handlers(self) -> dict[str, Any]: - @self.app.route( - '/{catalog}/{action}', - methods=['POST'], - spec={ - 'tags': ['Indexing'], - 'summary': 'Notify the indexer to perform an action on a bundle', - 'description': fd(''' - Queue a bundle for addition to or deletion from the index. - - The request must be authenticated using HMAC via the ``signature`` - header. Each Azul deployment has its own unique HMAC key. The HMAC - components are the request method, request path, and the SHA256 - digest of the request body. - - A valid HMAC header proves that the client is in possession of the - secret HMAC key and that the request wasn't tampered with while - travelling between client and service, even though the latter is not - strictly necessary considering that TLS is used to encrypt the - entire exchange. Internal clients can obtain the secret key from the - environment they are running in, and that they share with the - service. External clients must have been given the secret key. The - now-defunct DSS was such an external client. The Azul indexer - provided the HMAC secret to DSS when it registered with DSS to be - notified about bundle additions/deletions. These days only internal - clients use this endpoint. - '''), - 'requestBody': { - 'description': 'Contents of the notification', - 'required': True, - **json_content(schema.object( - bundle_fqid=schema.object( - uuid=str, - version=str, - source=schema.object( - id=str, - spec=str - ) - ) - )) - }, - 'parameters': [ - params.path('catalog', - schema.enum(*config.catalogs), - description='The name of the catalog to notify.'), - params.path('action', - schema.enum(IndexAction.add.name, IndexAction.delete.name), - description='Which action to perform.'), - params.header('signature', - str, - description='HMAC authentication signature.') - ], - 'responses': { - '200': { - 'description': 'Notification was successfully queued for processing' - }, - '400': { - 'description': 'Request was rejected due to malformed parameters' - }, - '401': { - 'description': 'Request lacked a valid HMAC header' - } - } - } - ) - def post_notification(catalog: CatalogName, action: str): - """ - Receive a notification event and queue it for indexing or deletion. - """ - return self.handle_notification(catalog, action) - - @self.app.metric_alarm(metric=LambdaMetric.errors, - threshold=int(config.contribution_concurrency(retry=False) * 2 / 3), - period=5 * 60) - @self.app.metric_alarm(metric=LambdaMetric.throttles, - threshold=int(96000 / config.contribution_concurrency(retry=False)), - period=5 * 60) - @self.app.on_sqs_message( - queue=config.notifications_queue.name, - batch_size=1 - ) - def contribute(event: chalice.app.SQSEvent): - self.contribute(event) - - @self.app.metric_alarm(metric=LambdaMetric.errors, - threshold=int(config.aggregation_concurrency(retry=False) * 3), - period=5 * 60) - @self.app.metric_alarm(metric=LambdaMetric.throttles, - threshold=int(37760 / config.aggregation_concurrency(retry=False)), - period=5 * 60) - @self.app.on_sqs_message( - queue=config.tallies_queue.name, - batch_size=Queues.batch_size - ) - def aggregate(event: chalice.app.SQSEvent): - self.aggregate(event) - - # Any messages in the tallies queue that fail being processed will be - # retried with more RAM in the tallies_retry queue. - - @self.app.metric_alarm(metric=LambdaMetric.errors, - threshold=int(config.aggregation_concurrency(retry=True) * 1 / 16), - period=5 * 60) - @self.app.metric_alarm(metric=LambdaMetric.throttles, - threshold=0, - period=5 * 60) - @self.app.on_sqs_message( - queue=config.tallies_queue.to_retry.name, - batch_size=Queues.batch_size - ) - def aggregate_retry(event: chalice.app.SQSEvent): - self.aggregate(event, retry=True) - - # Any messages in the notifications queue that fail being processed will - # be retried with more RAM and a longer timeout in the - # notifications_retry queue. - - @self.app.metric_alarm(metric=LambdaMetric.errors, - threshold=int(config.contribution_concurrency(retry=True) * 1 / 4), - period=5 * 60) - @self.app.metric_alarm(metric=LambdaMetric.throttles, - threshold=int(31760 / config.contribution_concurrency(retry=True)), - period=5 * 60) - @self.app.on_sqs_message( - queue=config.notifications_queue.to_retry.name, - batch_size=1 - ) - def contribute_retry(event: chalice.app.SQSEvent): - self.contribute(event, retry=True) - - return locals() - - def handle_notification(self, catalog: CatalogName, action: str): - request = self.current_request - if isinstance(request.authentication, HMACAuthentication): - assert request.authentication.identity() is not None - try: - config.Catalog.validate_name(catalog) - except AssertionError as e: - if R.caused(e): - raise R.propagate(e, chalice.BadRequestError) - notification = request.json_body - log.info('Received notification %r for catalog %r', notification, catalog) - self._validate_notification(notification) - service = self.index_queue_service - message = service.index_bundle_message(self._load_action(action), - catalog, - notification['bundle_fqid'], - BundlePartition.root) - service.queue_notification(message, retry=False) - return chalice.app.Response(body='', status_code=http.HTTPStatus.ACCEPTED) - else: - raise UnauthorizedError() - - def _validate_notification(self, notification): - try: - bundle_fqid = notification['bundle_fqid'] - except KeyError: - raise chalice.BadRequestError('Missing notification entry: bundle_fqid') - - try: - bundle_uuid = bundle_fqid['uuid'] - except KeyError: - raise chalice.BadRequestError('Missing notification entry: bundle_fqid.uuid') - - try: - bundle_version = bundle_fqid['version'] - except KeyError: - raise chalice.BadRequestError('Missing notification entry: bundle_fqid.version') - - if not isinstance(bundle_uuid, str): - raise chalice.BadRequestError(f'Invalid type: uuid: {type(bundle_uuid)} (should be str)') - - if not isinstance(bundle_version, str): - raise chalice.BadRequestError(f'Invalid type: version: {type(bundle_version)} (should be str)') - - if bundle_uuid.lower() != str(uuid.UUID(bundle_uuid)).lower(): - raise chalice.BadRequestError(f'Invalid syntax: {bundle_uuid} (should be a UUID)') - - if not bundle_version: - raise chalice.BadRequestError('Invalid syntax: bundle_version can not be empty') - - def contribute(self, event: Iterable[SQSRecord], *, retry=False): - self._handle_events(event, self.index_queue_service.contribute) - - def aggregate(self, event: Iterable[SQSRecord], *, retry=False): - # Consolidate multiple tallies for the same entity and process entities - # with only one message. Because SQS FIFO queues try to put as many - # messages from the same message group in a reception batch, a single - # message per group may indicate that that message is the last one in - # the group. Inversely, multiple messages per group in a batch are a - # likely indicator for the presence of even more queued messages in - # that group. The more bundle contributions we defer, the higher the - # amortized savings on aggregation become. Aggregating bundle - # contributions is a costly operation for any entity with many - # contributions e.g., a large project. - # - tallies = [] - for record in event: - message = SQSFifoMessage.from_record(record) - tally = DocumentTally.from_message(message) - log.info('Attempt %i of handling %i contribution(s) for entity %s, ' - 'message ID %s, group ID %s', - tally.attempts, tally.num_contributions, tally.entity, - message.id, message.group_id) - tallies.append(tally) - try: - self.index_queue_service.aggregate(tallies, retry=retry) - except BaseException: - # Note that another problematic outcome is for the Lambda invocation - # to time out, in which case this log message will not be written. - log.warning('Failed to aggregate tallies: %r', tallies, exc_info=True) - raise diff --git a/src/azul/indexer/index_queue_service.py b/src/azul/indexer/index_queue_service.py deleted file mode 100644 index b4b9d58c36..0000000000 --- a/src/azul/indexer/index_queue_service.py +++ /dev/null @@ -1,357 +0,0 @@ -from collections import ( - defaultdict, -) -from enum import ( - auto, -) -import logging -from typing import ( - Iterable, - Self, - TYPE_CHECKING, - cast, -) - -import attrs - -from azul import ( - CatalogName, - cached_property, - config, - json_mapping, -) -from azul.deployment import ( - aws, -) -from azul.indexer import ( - BundlePartition, - SourceRef, - SourceSpec, -) -from azul.indexer.document import ( - Contribution, - EntityReference, - Replica, -) -from azul.indexer.index_repository_service import ( - IndexRepositoryService, -) -from azul.indexer.index_service import ( - CataloguedEntityReference, - IndexService, -) -from azul.queues import ( - Action, - Queues, - SQSFifoMessage, - SQSMessage, -) -from azul.types import ( - JSON, - json_int, - json_str, -) - -if TYPE_CHECKING: - from mypy_boto3_sqs.service_resource import ( - Queue, - ) - -log = logging.getLogger(__name__) - - -class IndexAction(Action): - reindex = auto() - add = auto() - delete = auto() - - -class IndexQueueService: - - @cached_property - def index_service(self) -> IndexService: - return IndexService() - - @cached_property - def index_repository_service(self) -> IndexRepositoryService: - return IndexRepositoryService() - - @cached_property - def queues(self) -> Queues: - return Queues() - - def notifications_queue(self, *, retry: bool = False) -> 'Queue': - name = config.notifications_queue.derive(retry=retry).name - return aws.sqs_queue(name) - - def tallies_queue(self, *, retry: bool = False) -> 'Queue': - name = config.tallies_queue.derive(retry=retry).name - return aws.sqs_queue(name) - - def queue_notifications(self, - messages: Iterable[SQSMessage], - *, - retry: bool = False - ) -> int: - queue = self.notifications_queue(retry=retry) - return self.queues.send_messages(queue, messages) - - def queue_notification(self, - message: SQSMessage, - *, - retry: bool - ) -> None: - queue = self.notifications_queue(retry=retry) - self.queues.send_message(queue, message) - log.info('Queued notification message %r', message) - - def queue_tallies(self, - messages: Iterable[SQSMessage], - *, - retry: bool = False - ) -> int: - queue = self.tallies_queue(retry=retry) - return self.queues.send_messages(queue, messages) - - def index_bundle_message(self, - action: IndexAction, - catalog: CatalogName, - bundle_fqid: JSON, - bundle_partition: BundlePartition = BundlePartition.root, - ) -> SQSMessage: - return SQSMessage( - body={ - 'action': action.to_json(), - 'catalog': catalog, - 'bundle_fqid': bundle_fqid, - 'bundle_partition': bundle_partition.to_json(), - } - ) - - def index_partition_message(self, - catalog: CatalogName, - source: SourceRef, - prefix: str - ) -> SQSMessage: - return SQSMessage( - body={ - 'action': IndexAction.reindex.to_json(), - 'catalog': catalog, - 'source': cast(JSON, source.to_json()), - 'prefix': prefix - } - ) - - def remote_reindex(self, catalog: CatalogName, sources: Iterable[SourceSpec]): - service = self.index_repository_service - plugin = service.repository_plugin(catalog) - for source_spec in sources: - source_ref = plugin.resolve_source(source_spec) - source_ref = plugin.partition_source_for_indexing(catalog, source_ref) - - def message(partition_prefix: str) -> SQSMessage: - log.info('Remotely reindexing prefix %r of source_ref %r into catalog %r', - partition_prefix, str(source_ref.spec), catalog) - return self.index_partition_message(catalog, source_ref, partition_prefix) - - messages = map(message, source_ref.spec.prefix.partition_prefixes()) - self.queue_notifications(messages) - - def remote_reindex_partition(self, message: JSON) -> None: - service = self.index_repository_service - catalog, prefix = message['catalog'], message['prefix'] - assert isinstance(catalog, str) and isinstance(prefix, str) - source = json_mapping(message['source']) - plugin = service.repository_plugin(catalog) - source = plugin.source_ref_cls.from_json(source) - bundle_fqids = service.list_bundles(catalog, source, prefix) - # All AnVIL bundles and entities use the same version - if not config.is_anvil_enabled(catalog): - bundle_fqids = service.filter_obsolete_bundle_versions(bundle_fqids) - log.info('After filtering obsolete versions, ' - '%i bundles remain in prefix %r of source %r in catalog %r', - len(bundle_fqids), prefix, str(source.spec), catalog) - messages = ( - self.index_bundle_message(IndexAction.add, catalog, bundle_fqid.to_json()) - for bundle_fqid in bundle_fqids - ) - num_messages = self.queue_notifications(messages) - log.info('Successfully queued %i notification(s) for prefix %s of ' - 'source %r', num_messages, prefix, source) - - def contribute(self, action: IndexAction, message: JSON): - if action is IndexAction.reindex: - self.remote_reindex_partition(message) - else: - catalog = json_str(message['catalog']) - assert catalog is not None - delete = action is IndexAction.delete - bundle_fqid = json_mapping(message['bundle_fqid']) - bundle_partition = json_mapping(message['bundle_partition']) - bundle_partition = BundlePartition.from_json(bundle_partition) - contributions, replicas = self.transform(catalog, - bundle_fqid, - bundle_partition, - delete=delete) - log.info('Writing %i contributions to index.', len(contributions)) - tallies = self.index_service.contribute(catalog, contributions) - tallies = [DocumentTally.for_entity(catalog, entity, num_contributions) - for entity, num_contributions in tallies.items()] - - if replicas: - if delete: - # FIXME: Replica index does not support deletions - # https://github.com/DataBiosphere/azul/issues/5846 - log.warning('Deletion of replicas is not supported') - else: - log.info('Writing %i replicas to index.', len(replicas)) - num_written = self.index_service.replicate(catalog, replicas) - log.info('Successfully wrote %i replicas', num_written) - else: - log.info('No replicas to write.') - - log.info('Queueing %i entities for aggregating a total of %i contributions.', - len(tallies), sum(tally.num_contributions for tally in tallies)) - messages = (tally.to_message() for tally in tallies) - self.queue_tallies(messages) - - def transform(self, - catalog: CatalogName, - bundle_fqid: JSON, - bundle_partition: BundlePartition, - *, - delete: bool - ) -> tuple[list[Contribution], list[Replica]]: - """ - Transform the metadata in the bundle referenced by the given - notification into a list of contributions to documents, each document - representing one metadata entity in the index. Replicas of the original, - untransformed metadata are returned as well. - """ - bundle = self.index_repository_service.fetch_bundle(catalog, - bundle_fqid) - results = self.index_service.transform(catalog, - bundle, - bundle_partition, - delete=delete) - if isinstance(results, list): - action = IndexAction.delete if delete else IndexAction.add - for bundle_partition in results: - assert isinstance(bundle_partition, BundlePartition) - # There's a good chance that the partition will also fail in - # the non-retry Lambda function so we'll go straight to retry. - message = self.index_bundle_message(action, - catalog, - bundle_fqid, - bundle_partition) - self.queue_notification(message, retry=True) - return [], [] - elif isinstance(results, tuple): - return results - else: - assert False, results - - #: The number of failed attempts before a tally is referred as a batch of 1. - #: Note that the retry lambda does first attempts, too, namely on re-fed and - #: deferred tallies. - # - num_batched_aggregation_attempts = 3 - - def aggregate(self, tallies: list['DocumentTally'], *, retry: bool): - tallies_by_entity: dict[CataloguedEntityReference, list[DocumentTally]] = defaultdict(list) - for tally in tallies: - tallies_by_entity[tally.entity].append(tally) - deferrals, referrals = [], [] - for tallies in tallies_by_entity.values(): - if len(tallies) == 1: - referrals.append(tallies[0]) - elif len(tallies) > 1: - deferrals.append(tallies[0].consolidate(tallies[1:])) - else: - assert False - if referrals: - for i, tally in enumerate(referrals): - if tally.attempts > self.num_batched_aggregation_attempts: - log.info('Only aggregating problematic entity %s, deferring all others', - tally.entity) - referrals.pop(i) - deferrals.extend(referrals) - referrals = [tally] - break - - log.info('Referring %i tallies', len(referrals)) - tally_by_entity = {} - for tally in referrals: - log.info('Aggregating %i contribution(s) to entity %s', - tally.num_contributions, tally.entity) - tally_by_entity[tally.entity] = tally.num_contributions - - self.index_service.aggregate(tally_by_entity) - - for tally in referrals: - log.info('Successfully aggregated %i contribution(s) to entity %s', - tally.num_contributions, tally.entity) - log.info('Successfully referred %i tallies', len(referrals)) - if deferrals: - log.info('Deferring %i tallies', len(deferrals)) - for tally in deferrals: - log.info('Deferring aggregation of %i contribution(s) to entity %s', - tally.num_contributions, tally.entity) - messages = (tally.to_message() for tally in deferrals) - # Hopefully this is more or less atomic. If we crash below here, - # tallies will be inflated because some or all deferrals have - # been sent and the original tallies will be returned. - self.queue_tallies(messages, retry=retry) - - -@attrs.frozen(kw_only=True) -class DocumentTally: - """ - Tracks the number of bundle contributions to a particular metadata entity. - - Each instance represents a message in the document queue. - """ - entity: CataloguedEntityReference - num_contributions: int - attempts: int - - @classmethod - def for_entity(cls, - catalog: CatalogName, - entity: EntityReference, - num_contributions: int) -> Self: - return cls(entity=CataloguedEntityReference(catalog=catalog, - entity_type=entity.entity_type, - entity_id=entity.entity_id), - num_contributions=num_contributions, - attempts=0) - - @classmethod - def from_message(cls, msg: SQSFifoMessage) -> Self: - return cls.from_json(msg.body, json_int(msg.attempts)) - - @classmethod - def from_json(cls, json: JSON, attempts: int) -> Self: - return cls(entity=CataloguedEntityReference(catalog=json_str(json['catalog']), - entity_type=json_str(json['entity_type']), - entity_id=json_str(json['entity_id'])), - num_contributions=json_int(json['num_contributions']), - attempts=attempts) - - def to_json(self) -> JSON: - return { - 'catalog': self.entity.catalog, - 'entity_type': self.entity.entity_type, - 'entity_id': self.entity.entity_id, - 'num_contributions': self.num_contributions - } - - def to_message(self) -> SQSFifoMessage: - return SQSFifoMessage(body=self.to_json(), - group_id=str(self.entity)) - - def consolidate(self, others: list['DocumentTally']) -> Self: - assert all(self.entity == other.entity for other in others) - num_contributions = sum((other.num_contributions for other in others), - start=self.num_contributions) - return attrs.evolve(self, num_contributions=num_contributions) diff --git a/src/azul/indexer/index_repository_service.py b/src/azul/indexer/index_repository_service.py deleted file mode 100644 index c9f593e87c..0000000000 --- a/src/azul/indexer/index_repository_service.py +++ /dev/null @@ -1,142 +0,0 @@ -from itertools import ( - groupby, -) -import logging -from typing import ( - Iterable, -) - -from azul import ( - CatalogName, - JSON, - cache, -) -from azul.indexer import ( - Bundle, - SourceRef, - SourcedBundleFQID, -) -from azul.plugins import ( - RepositoryPlugin, -) - -log = logging.getLogger(__name__) - - -class IndexRepositoryService: - - @cache - def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return RepositoryPlugin.load(catalog).create(catalog) - - def list_bundles(self, - catalog: CatalogName, - source: SourceRef, - prefix: str - ) -> list[SourcedBundleFQID]: - plugin = self.repository_plugin(catalog) - log.info('Listing bundles with prefix %r in source %r.', prefix, source) - bundle_fqids = plugin.list_bundles(source, prefix) - log.info('There are %i bundle(s) with prefix %r in source %r.', - len(bundle_fqids), prefix, source) - return bundle_fqids - - def filter_obsolete_bundle_versions(self, - bundle_fqids: Iterable[SourcedBundleFQID] - ) -> list[SourcedBundleFQID]: - """ - Suppress obsolete bundle versions by only taking the latest version for - each bundle UUID. - >>> service = IndexRepositoryService() - >>> service.filter_obsolete_bundle_versions([]) - [] - >>> from azul.indexer import SimpleSourceSpec, SourceRef, Prefix - >>> p = Prefix.parse('/2') - >>> s = SourceRef(id='i', spec=SimpleSourceSpec(prefix=p, name='n')) - >>> def b(u, v): - ... return SourcedBundleFQID(source=s, uuid=u, version=v) - >>> service.filter_obsolete_bundle_versions([ - ... b('c', '0'), - ... b('a', '1'), - ... b('b', '3') - ... ]) # doctest: +NORMALIZE_WHITESPACE - [SourcedBundleFQID(uuid='c', - version='0', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n'))), - SourcedBundleFQID(uuid='b', - version='3', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n'))), - SourcedBundleFQID(uuid='a', - version='1', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n')))] - >>> service.filter_obsolete_bundle_versions([ - ... b('C', '0'), b('a', '1'), b('a', '0'), - ... b('a', '2'), b('b', '1'), b('c', '2') - ... ]) # doctest: +NORMALIZE_WHITESPACE - [SourcedBundleFQID(uuid='c', - version='2', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n'))), - SourcedBundleFQID(uuid='b', - version='1', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n'))), - SourcedBundleFQID(uuid='a', - version='2', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n')))] - >>> service.filter_obsolete_bundle_versions([ - ... b('a', '0'), b('A', '1') - ... ]) # doctest: +NORMALIZE_WHITESPACE - [SourcedBundleFQID(uuid='A', - version='1', - source=SourceRef(id='i', - spec=SimpleSourceSpec(prefix=Prefix(common='', - partition=2), - name='n')))] - """ - - # Sort lexicographically by source and FQID. I've observed the DSS - # response to already be in this order - def sort_key(fqid: SourcedBundleFQID): - return ( - fqid.source, - fqid.uuid.lower(), - fqid.version.lower() - ) - - bundle_fqids = sorted(bundle_fqids, key=sort_key, reverse=True) - - # Group by source and bundle UUID - def group_key(fqid: SourcedBundleFQID): - return ( - fqid.source.id.lower(), - fqid.uuid.lower() - ) - - groups = groupby(bundle_fqids, key=group_key) - - # Take the first item in each group. Because the oder is reversed, this - # is the latest version - bundle_fqids = [next(group) for _, group in groups] - return bundle_fqids - - def fetch_bundle(self, catalog: CatalogName, bundle_fqid: JSON) -> Bundle: - plugin = self.repository_plugin(catalog) - bundle_fqid = plugin.bundle_fqid_cls.from_json(bundle_fqid) - return plugin.fetch_bundle(bundle_fqid) diff --git a/src/azul/indexer/index_service.py b/src/azul/indexer/index_service.py deleted file mode 100644 index 30cbf29a41..0000000000 --- a/src/azul/indexer/index_service.py +++ /dev/null @@ -1,976 +0,0 @@ -from collections import ( - Counter, - defaultdict, -) -from collections.abc import ( - Iterable, - Iterator, - Mapping, - Sequence, -) -from itertools import ( - groupby, -) -import logging -from operator import ( - attrgetter, -) -from typing import ( - Any, - TYPE_CHECKING, - cast, -) - -from more_itertools import ( - first, - one, -) -from opensearchpy import ( - ConflictError, - OpenSearchException, -) -from opensearchpy.exceptions import ( - NotFoundError, - RequestError, -) -from opensearchpy.helpers import ( - streaming_bulk, -) - -from azul import ( - CatalogName, - config, -) -from azul.deployment import ( - aws, -) -from azul.es import ( - ESClientFactory, -) -from azul.indexer import ( - Bundle, - BundleFQID, - BundlePartition, - BundleUUID, -) -from azul.indexer.document import ( - Aggregate, - AggregateCoordinates, - CataloguedContribution, - CataloguedEntityReference, - Contribution, - Document, - DocumentCoordinates, - DocumentType, - EntityID, - EntityReference, - EntityType, - IndexName, - OpType, - Replica, - ReplicaCoordinates, -) -from azul.indexer.document_service import ( - DocumentService, -) -from azul.indexer.field import ( - CataloguedFieldTypes, -) -from azul.indexer.transform import ( - Transformer, -) -from azul.json_freeze import ( - freeze, -) -from azul.logging import ( - silenced_es_logger, -) -from azul.types import ( - AnyJSON, - CompositeJSON, - JSON, - JSONs, -) - -log = logging.getLogger(__name__) - -Tallies = Mapping[EntityReference, int] - -CataloguedTallies = Mapping[CataloguedEntityReference, int] - -MutableCataloguedTallies = dict[CataloguedEntityReference, int] - - -class IndexExistsAndDiffersException(Exception): - pass - - -class IndexService(DocumentService): - - def settings(self, index_name: IndexName) -> JSON: - index_name.validate() - aggregate = index_name.doc_type is DocumentType.aggregate - # There is a terminology collision between ElasticSearch's concept of an - # index replica, and our Azul-specific concept of an entity/document - # replica. - replica = index_name.doc_type is DocumentType.replica - catalog = index_name.catalog - assert catalog is not None, catalog - if ( - config.catalogs[catalog].is_integration_test_catalog - or config.deployment.is_unit_test - ): - # The test catalogs are far smaller than non-test catalogs. There is - # no need for the same degree of concurrency as the non-test catalogs. - # Fixing the number of shards also helps keep the order of documents - # in the index deterministic, which helps with writing unit tests, - # e.g. the verbatim PFB manifest tests. - num_shards = 1 - num_replicas = 0 - else: - num_nodes = aws.es_instance_count - num_workers = config.contribution_concurrency(retry=False) - - # Put the sole primary aggregate shard on one node and a replica - # on all others. The reason for just one primary shard is that - # aggregate indices are small and don't need to be sharded. Each - # shard incurs a significant overhead in ES so we want to - # minimize their number in the absence of overriding concerns - # like optimization for write concurrency. The reason for putting - # a replica on all other nodes is that we do want a full copy of - # each aggregate index on every node so that every node can - # answer client requests without coordinating with other nodes. - # - # Linearly scale the number of contribution shards with the number - # of contribution writers. There was no notable difference in - # speed between factors 1 and 1/4 but the memory pressure was - # unsustainably high with factor 1. In later experiments a factor - # of 1/8 was determined to be preferential, but I don't recall - # the details. We neglected to document our process at the time. - # - # There is no need to replicate the contribution indices because - # their durability does not matter to us as much. If a node goes - # down, we'll just reindex. Since service requests only hit the - # aggregate indices, we can lose all but one node before - # customers are affected. - # - num_shards = 1 if aggregate else max(num_nodes, num_workers // 8) - num_replicas = (num_nodes - 1) if aggregate or replica else 0 - return { - 'index': { - 'number_of_shards': num_shards, - 'number_of_replicas': num_replicas, - 'refresh_interval': f'{config.es_refresh_interval}s' - } - } - - def index_names(self, catalog: CatalogName) -> list[IndexName]: - return [ - IndexName.create(catalog=catalog, - qualifier=entity_type, - doc_type=doc_type) - for entity_type in self.entity_types(catalog) - for doc_type in (DocumentType.contribution, DocumentType.aggregate) - ] + ( - [ - IndexName.create(catalog=catalog, - qualifier=ReplicaCoordinates.index_qualifier, - doc_type=DocumentType.replica) - ] - if config.enable_replicas else - [] - ) - - def index(self, catalog: CatalogName, bundle: Bundle) -> None: - """ - Index the bundle referenced by the given notification into the specified - catalog. This is an inefficient default implementation. A more efficient - implementation would transform many bundles, collect their contributions - and aggregate all affected entities at the end. - """ - transforms = self.deep_transform(catalog, bundle, delete=False) - tallies = {} - for contributions, replicas in transforms: - tallies.update(self.contribute(catalog, contributions)) - self.replicate(catalog, replicas) - self.aggregate(tallies) - - def delete(self, catalog: CatalogName, bundle: Bundle) -> None: - """ - Synchronous form of delete that is currently only used for testing. - - In production code, there is an SQS queue between the calls to - `contribute()` and `aggregate()`. - """ - # FIXME: this only works if the bundle version is not being indexed - # concurrently. The fix could be to optimistically lock on the - # aggregate version (https://github.com/DataBiosphere/azul/issues/611) - transforms = self.deep_transform(catalog, bundle, delete=True) - tallies = {} - for contributions, replicas in transforms: - # FIXME: these are all modified contributions, not new ones. This also - # happens when we reindex without deleting the indices first. The - # tallies refer to number of updated or added contributions but - # we treat them as if they are all new when we estimate the - # number of contributions per bundle. - # https://github.com/DataBiosphere/azul/issues/610 - tallies.update(self.contribute(catalog, contributions)) - # FIXME: Replica index does not support deletions - # https://github.com/DataBiosphere/azul/issues/5846 - self.aggregate(tallies) - - def deep_transform(self, - catalog: CatalogName, - bundle: Bundle, - partition: BundlePartition = BundlePartition.root, - *, - delete: bool - ) -> Iterator[tuple[list[Contribution], list[Replica]]]: - """ - Recursively transform the given partition of the specified bundle and - any divisions of that partition. This should be used by synchronous - indexing. The default asynchronous indexing would defer divisions of the - starting partition and schedule a follow-on notification for each of the - divisions. - """ - results = self.transform(catalog, bundle, partition, delete=delete) - result = first(results, None) - if isinstance(result, BundlePartition): - for sub_partition in results: - yield from self.deep_transform(catalog, bundle, sub_partition, delete=delete) - elif isinstance(results, tuple): - yield results - elif result is None: - yield [], [] - else: - assert False, type(result) - - def transform(self, - catalog: CatalogName, - bundle: Bundle, - partition: BundlePartition = BundlePartition.root, - *, - delete: bool, - ) -> list[BundlePartition] | tuple[list[Contribution], list[Replica]]: - """ - Return a list of contributions and a list of replicas for the entities - in the given partition of the specified bundle, or a set of divisions of - the given partition if it contains too many entities. - - :param catalog: the name of the catalog to contribute to - - :param bundle: the bundle to transform - - :param partition: the bundle partition to transform - - :param delete: True, if the bundle should be removed from the catalog. - The resulting contributions will be deletions instead - of additions. - """ - plugin = self.metadata_plugin(catalog) - bundle.reject_joiner() - transformers = plugin.transformers(bundle, delete=delete) - log.info('Estimating size of partition %s of bundle %s, version %s.', - partition, bundle.uuid, bundle.version) - num_entities = sum(transformer.estimate(partition) for transformer in transformers) - num_divisions = partition.divisions(num_entities) - if num_divisions > 1: - log.info('Dividing partition %s of bundle %s, version %s, ' - 'with %i entities into %i sub-partitions.', - partition, bundle.uuid, bundle.version, num_entities, num_divisions) - return partition.divide(num_divisions) - else: - log.info('Transforming %i entities in partition %s of bundle %s, version %s.', - num_entities, partition, bundle.uuid, bundle.version) - contributions = [] - replicas_by_coords = {} - for transformer in transformers: - for document in transformer.transform(partition): - if isinstance(document, Contribution): - contributions.append(document) - elif isinstance(document, Replica): - try: - dup = replicas_by_coords[document.coordinates] - except KeyError: - replicas_by_coords[document.coordinates] = document - else: - dup.hub_ids.extend(document.hub_ids) - else: - assert False, document - return contributions, list(replicas_by_coords.values()) - - def create_indices(self, catalog: CatalogName): - es_client = ESClientFactory.get() - for index_name in self.index_names(catalog): - while True: - settings = self.settings(index_name) - mappings = self.metadata_plugin(catalog).mapping(index_name) - try: - with silenced_es_logger(): - index = es_client.indices.get(index=str(index_name)) - except NotFoundError: - try: - es_client.indices.create(index=str(index_name), - body=dict(settings=settings, - mappings=mappings)) - except RequestError as e: - if e.error == 'resource_already_exists_exception': - log.info('Another party concurrently created index %s (%r), retrying.', - index_name, index_name) - else: - raise - else: - self._check_index(settings=settings, - mappings=mappings, - index=index[str(index_name)]) - break - - def _check_index(self, *, settings: JSON, mappings: JSON, index: JSON): - - def stringify(value: AnyJSON) -> AnyJSON: - return ( - {k: stringify(v) for k, v in value.items()} - if isinstance(value, dict) else - [stringify(v) for v in value] - if isinstance(value, list) else - str(value) - ) - - def setify(value: CompositeJSON - ) -> set[tuple[str, AnyJSON]] | set[AnyJSON]: - value = freeze(value) - return set( - value.items() - if isinstance(value, Mapping) else - value - ) - - def flatten(value: JSON, *path) -> Iterable[tuple[tuple[str, ...], AnyJSON]]: - for k, v in value.items(): - if isinstance(v, Mapping): - yield from flatten(v, *path, k) - else: - yield (*path, k), v - - # Compare the index settings - expected, actual = ( - setify(dict(flatten(stringify(s)))) - for s in [settings, index['settings']] - ) - if not expected <= actual: - raise IndexExistsAndDiffersException('settings', settings, index['settings']) - - # Compare the static field mapping - key = 'properties' - expected, actual = ( - setify(dict(flatten(m.get(key, {})))) - for m in [mappings, index['mappings']] - ) - if not expected <= actual: - raise IndexExistsAndDiffersException(key, mappings, index['mappings']) - - # Compare the dynamic field mapping - key = 'dynamic_templates' - expected, actual = ( - setify(m.get(key, [])) - for m in [mappings, index['mappings']] - ) - if not expected == actual: - raise IndexExistsAndDiffersException(key, mappings, index['mappings']) - - # Compare the rest of the mapping - expected, actual = ( - setify(dict(flatten({ - k: v - for k, v in m.items() - if k not in {'properties', 'dynamic_templates'} - }))) - for m in [mappings, index['mappings']] - ) - if not expected <= actual: - raise IndexExistsAndDiffersException('mappings', mappings, index['mappings']) - - def delete_indices(self, catalog: CatalogName): - es_client = ESClientFactory.get() - for index_name in self.index_names(catalog): - if es_client.indices.exists(index=str(index_name)): - es_client.indices.delete(index=str(index_name)) - - def contribute(self, - catalog: CatalogName, - contributions: list[Contribution] - ) -> CataloguedTallies: - """ - Write the given entity contributions to the index and return tallies, a - dictionary tracking the number of contributions made to each entity. - - Tallies for overwritten documents are not counted. This means a tally - with a count of 0 may exist. This is ok. See description of aggregate(). - """ - tallies = Counter() - writer = self._create_writer(DocumentType.contribution, catalog) - while contributions: - writer.write(contributions) - retry_contributions = [] - for c in contributions: - if c.coordinates in writer.retries: - retry_contributions.append(c) - else: - entity = CataloguedEntityReference.for_entity(catalog, c.coordinates.entity) - # Don't count overwrites, but ensure entry exists - was_overwrite = c.op_type is OpType.index - tallies[entity] += 0 if was_overwrite else 1 - contributions = retry_contributions - writer.raise_on_errors() - return tallies - - def aggregate(self, tallies: CataloguedTallies): - """ - Read all contributions to the entities listed in the given tallies from - the index, aggregate the contributions into one aggregate per entity and - write the resulting aggregates to the index. - - Normally there is a one-to-one correspondence between number of - contributions for an entity and the value for a tally, however tallies - are not counted for updates. This means, in the case of a duplicate - notification or writing over an already populated index, it's possible - to receive a tally with a value of 0. We still need to aggregate (if the - indexed format changed for example). Tallies are a lower bound for the - number of contributions in the index for a given entity. - - Also note that the input tallies can refer to entities from different - catalogs. - """ - # Attempting to filter by an empty array of coordinates while reading - # the aggregates will fail with a 400 error from ElasticSearch. This - # happens when indexing replica bundles for AnVIL, since they emit no - # contributions. - if not tallies: - return - # Use catalog specified in each tally - writer = self._create_writer(DocumentType.aggregate, catalog=None) - while True: - # Read the aggregates - old_aggregates = self._read_aggregates(tallies) - total_tallies: MutableCataloguedTallies = Counter(tallies) - total_tallies.update({ - old_aggregate.coordinates.entity: old_aggregate.num_contributions - for old_aggregate in old_aggregates.values() - }) - - # Read all contributions - contributions = self._read_contributions(total_tallies) - actual_tallies = Counter(contribution.coordinates.entity - for contribution in contributions) - if tallies.keys() != actual_tallies.keys(): - message = 'Could not find all expected contributions.' - args = (tallies, actual_tallies) if config.debug else () - raise EventualConsistencyException(message, *args) - assert all(tallies[entity] <= actual_tally - for entity, actual_tally in actual_tallies.items()) - - # Combine the contributions into new aggregates, one per entity - new_aggregates = self._aggregate(contributions) - - # Remove old aggregates (leaving over only deletions) while - # propagating the expected document version to the corresponding new - # aggregate - for new_aggregate in new_aggregates: - old_aggregate = old_aggregates.pop(new_aggregate.coordinates.entity, None) - new_aggregate.version = None if old_aggregate is None else old_aggregate.version - - # Empty out the left-over, deleted aggregates - for old_aggregate in old_aggregates.values(): - old_aggregate.contents = {} - new_aggregates.append(old_aggregate) - - # Write new aggregates - writer.write(new_aggregates) - - # Retry writes if necessary - if writer.retries: - tallies: CataloguedTallies = { - aggregate.coordinates.entity: tallies[aggregate.coordinates.entity] - for aggregate in new_aggregates - if aggregate.coordinates in writer.retries - } - else: - break - writer.raise_on_errors() - - def replicate(self, catalog: CatalogName, replicas: list[Replica]) -> int: - writer = self._create_writer(DocumentType.replica, catalog) - num_replicas = len(replicas) - num_written = 0 - while replicas: - writer.write(replicas) - retry_replicas = [] - for r in replicas: - if r.coordinates in writer.retries: - retry_replicas.append(r) - else: - num_written += 1 - replicas = retry_replicas - - writer.raise_on_errors() - assert num_written == num_replicas, (num_written, num_replicas) - return num_written - - def _read_aggregates(self, - entities: CataloguedTallies - ) -> dict[CataloguedEntityReference, Aggregate]: - coordinates = [ - AggregateCoordinates(entity=entity) - for entity in entities - ] - request = { - 'docs': [ - { - '_index': coordinate.index_name, - '_id': coordinate.document_id - } - for coordinate in coordinates - ] - } - catalogs = {coordinate.entity.catalog for coordinate in coordinates} - mandatory_source_fields = set() - for catalog in catalogs: - aggregate_cls = self.aggregate_class(catalog) - mandatory_source_fields.update(aggregate_cls.mandatory_source_fields()) - response = ESClientFactory.get().mget(body=request, - _source_includes=list(mandatory_source_fields)) - - def aggregates(): - for doc in response['docs']: - try: - found = doc['found'] - except KeyError: - raise RuntimeError('Malformed document', doc) - else: - if found: - coordinate = DocumentCoordinates.from_hit(doc) - aggregate_cls = self.aggregate_class(coordinate.entity.catalog) - aggregate = aggregate_cls.from_index(self.catalogued_field_types(), - doc, - coordinates=coordinate) - yield aggregate - - return {a.coordinates.entity: a for a in aggregates()} - - def _read_contributions(self, - tallies: CataloguedTallies - ) -> list[CataloguedContribution]: - es_client = ESClientFactory.get() - - entity_ids_by_index: dict[str, set[str]] = defaultdict(set) - for entity in tallies.keys(): - index = str(IndexName.create(catalog=entity.catalog, - qualifier=entity.entity_type, - doc_type=DocumentType.contribution)) - entity_ids_by_index[index].add(entity.entity_id) - - query = { - 'bool': { - 'should': [ - { - 'bool': { - 'must': [ - { - 'term': { - '_index': index - } - }, - { - 'terms': { - 'entity_id.keyword': list(entity_ids) - } - } - ] - } - } for index, entity_ids in entity_ids_by_index.items() - ] - } - } - - index = sorted(list(entity_ids_by_index.keys())) - num_contributions = sum(tallies.values()) - log.info('Reading %i expected contribution(s)', num_contributions) - - def pages() -> Iterable[JSONs]: - body = dict(query=query) - while True: - response = es_client.search(index=index, - sort=['_index', 'document_id.keyword'], - body=body, - size=config.contribution_page_size, - track_total_hits=False, - seq_no_primary_term=True) - hits = response['hits']['hits'] - log.debug('Read a page with %i contribution(s)', len(hits)) - if hits: - yield hits - body['search_after'] = hits[-1]['sort'] - else: - break - - contributions = [ - Contribution.from_index(self.catalogued_field_types(), hit) - for hits in pages() - for hit in hits - ] - - log.info('Read %i contribution(s)', len(contributions)) - if log.isEnabledFor(logging.DEBUG): - entity_ref = attrgetter('entity') - contributions_by_entity = cast( - Iterator[tuple[EntityReference, Iterator[Contribution]]], - groupby(sorted(contributions, key=entity_ref), key=entity_ref) - ) - log.debug( - 'Number of contributions read, by entity: %r', - { - f'{entity.entity_type}/{entity.entity_id}': sum(1 for _ in contribution_group) - for entity, contribution_group in contributions_by_entity - } - ) - return contributions - - def _aggregate(self, - contributions: list[CataloguedContribution] - ) -> list[Aggregate]: - # Group contributions by entity and bundle UUID - contributions_by_bundle: Mapping[ - tuple[CataloguedEntityReference, BundleUUID], - list[CataloguedContribution] - ] = defaultdict(list) - tallies: MutableCataloguedTallies = Counter() - for contribution in contributions: - entity = contribution.coordinates.entity - bundle_uuid = contribution.coordinates.bundle.uuid - contributions_by_bundle[entity, bundle_uuid].append(contribution) - # Track the raw, unfiltered number of contributions per entity. - assert isinstance(contribution.coordinates.entity, CataloguedEntityReference) - tallies[contribution.coordinates.entity] += 1 - - # For each entity and bundle, find the most recent contribution that is - # not a deletion - contributions_by_entity: dict[ - CataloguedEntityReference, list[CataloguedContribution]] = defaultdict(list) - for (entity, bundle_uuid), contributions in contributions_by_bundle.items(): - contributions = sorted(contributions, - key=attrgetter('coordinates.bundle.version', 'coordinates.deleted'), - reverse=True) - for bundle_version, group in groupby(contributions, key=attrgetter('coordinates.bundle.version')): - contribution: Contribution = next(group) - if not contribution.coordinates.deleted: - assert bundle_uuid == contribution.coordinates.bundle.uuid - assert bundle_version == contribution.coordinates.bundle.version - assert entity == contribution.coordinates.entity - contributions_by_entity[entity].append(contribution) - break - log.info('Selected %i contribution(s) to be aggregated.', - sum(len(contributions) for contributions in contributions_by_entity.values())) - if log.isEnabledFor(logging.DEBUG): - log.debug( - 'Number of contributions selected for aggregation, by entity: %r', - { - f'{entity.entity_type}/{entity.entity_id}': len(contributions) - for entity, contributions in sorted(contributions_by_entity.items()) - } - ) - - # Create lookup for transformer by entity type - transformers: dict[tuple[CatalogName, str], type[Transformer]] = { - (catalog, transformer_cls.entity_type()): transformer_cls - for catalog in config.catalogs - for transformer_cls in self.transformer_types(catalog) - } - - # Aggregate contributions for the same entity - aggregates = [] - for entity, contributions in contributions_by_entity.items(): - transformer = transformers[entity.catalog, entity.entity_type] - contents = self._aggregate_entity(transformer, contributions) - bundles = [ - BundleFQID(uuid=c.coordinates.bundle.uuid, - version=c.coordinates.bundle.version) - for c in contributions - ] - # FIXME: Replace hard coded limit with a config property - # https://github.com/DataBiosphere/azul/issues/3725 - max_bundles = 100 - if len(bundles) > max_bundles: - log.warning('Only aggregating %i out of %i bundles for outer entity %r', - max_bundles, len(bundles), entity) - bundles = bundles[:max_bundles] - sources = set(c.source for c in contributions) - aggregate_cls = self.aggregate_class(entity.catalog) - if TYPE_CHECKING: # work around https://youtrack.jetbrains.com/issue/PY-44728 - aggregate_cls = Aggregate - aggregate = aggregate_cls(coordinates=AggregateCoordinates(entity=entity), - version=None, - sources=sources, - contents=contents, - bundles=bundles, - num_contributions=tallies[entity]) - aggregates.append(aggregate) - - return aggregates - - def _aggregate_entity(self, - transformer: type[Transformer], - contributions: list[Contribution] - ) -> JSON: - contents = self._reconcile(transformer, contributions) - aggregate_contents = {} - inner_entity_types = transformer.inner_entity_types() - inner_entity_counts = [] - for entity_type, entities in contents.items(): - num_entities = len(entities) - if entity_type in inner_entity_types: - assert num_entities <= 1 - inner_entity_counts.append(num_entities) - else: - aggregator = transformer.aggregator(entity_type) - if aggregator is not None: - entities = aggregator.aggregate(entities) - aggregate_contents[entity_type] = entities - if inner_entity_counts: - assert sum(inner_entity_counts) > 0 - return aggregate_contents - - def _reconcile(self, - transformer: type[Transformer], - contributions: Sequence[Contribution], - ) -> Mapping[EntityType, JSONs]: - """ - Given all the contributions to a certain outer entity, reconcile - potentially different copies of the same inner entity in those - contributions. - """ - if len(contributions) == 1: - return one(contributions).contents - else: - result: dict[EntityType, dict[EntityID, tuple[JSON, BundleFQID]]] - result = defaultdict(dict) - for contribution in contributions: - that_bundle = contribution.coordinates.bundle - for entity_type, those_entities in contribution.contents.items(): - these_entities = result[entity_type] - for that_entity in those_entities: - entity_id = transformer.inner_entity_id(entity_type, that_entity) - this = these_entities.get(entity_id, (None, None)) - this_entity, this_bundle = this - that = (that_entity, that_bundle) - if this_entity is None: - these_entities[entity_id] = that - else: - that = transformer.reconcile_inner_entities(entity_type, this=this, that=that) - if this != that: - these_entities[entity_id] = that - return { - entity_type: [entity for entity, _ in entities.values()] - for entity_type, entities in result.items() - } - - def _create_writer(self, - doc_type: DocumentType, - catalog: CatalogName | None - ) -> 'IndexWriter': - # We allow one conflict retry in the case of duplicate notifications and - # switch from 'add' to 'update'. After that, there should be no - # conflicts because we use an SQS FIFO message group per entity. - # Conflicts are common when writing replicas due to entities being - # shared between bundles. For other errors we use SQS message redelivery - # to take care of the retries. - limits = { - DocumentType.contribution: 1, - DocumentType.aggregate: 1, - DocumentType.replica: config.replica_conflict_limit - } - return IndexWriter(catalog, - self.catalogued_field_types(), - refresh=False, - conflict_retry_limit=limits[doc_type], - error_retry_limit=0) - - -class IndexWriter: - - def __init__(self, - catalog: CatalogName | None, - field_types: CataloguedFieldTypes, - refresh: bool | str, - conflict_retry_limit: int, - error_retry_limit: int) -> None: - """ - :param field_types: A mapping of field paths to field type - - :param refresh: https://www.elastic.co/guide/en/elasticsearch/reference/5.5/docs-refresh.html - - :param conflict_retry_limit: The maximum number of retries (the second - attempt is the first retry) on version - conflicts. Specify 0 for no retries or None - for unlimited retries. - - :param error_retry_limit: The maximum number of retries (the second - attempt is the first retry) on other errors. - Specify 0 for no retries or None for - unlimited retries. - """ - super().__init__() - self.catalog = catalog - self.field_types = field_types - self.refresh = refresh - self.conflict_retry_limit = conflict_retry_limit - self.error_retry_limit = error_retry_limit - self.es_client = ESClientFactory.get() - self.errors: dict[DocumentCoordinates, int] = defaultdict(int) - self.conflicts: dict[DocumentCoordinates, int] = defaultdict(int) - self.retries: set[DocumentCoordinates] | None = None - - bulk_threshold = 32 - - def write(self, documents: list[Document]): - """ - Make an attempt to write the documents into the index, updating local - state with failures and conflicts - - :param documents: Documents to index - """ - self.retries = set() - if len(documents) < self.bulk_threshold: - self._write_individually(documents) - else: - self._write_bulk(documents) - - def _write_individually(self, documents: Iterable[Document]): - log.info('Writing documents individually') - for doc in documents: - try: - method = getattr(self.es_client, doc.op_type.name) - method(refresh=self.refresh, **doc.to_index(self.catalog, self.field_types)) - except ConflictError as e: - self._on_conflict(doc, e) - except OpenSearchException as e: - self._on_error(doc, e) - else: - self._on_success(doc) - - def _write_bulk(self, documents: Iterable[Document]): - # FIXME: document this quirk - documents: dict[DocumentCoordinates, Document] = { - doc.coordinates.with_catalog(self.catalog): doc - for doc in documents - } if self.catalog is not None else { - doc.coordinates: doc - for doc in documents - } - - def expand_action(doc: Any) -> tuple[dict[str, Any], dict[str, Any] | None]: - # Document.to_index returns the keyword arguments to the ES client - # method referenced by Document.op_type. In bulk requests, these - # methods are not invoked individually. This function converts the - # keyword arguments returned by Document.to_index to the form - # internally used by the ES client's `bulk` method: a pair - # consisting of 1) the action and associated metadata and 2) an - # optional document source. - assert isinstance(doc, Document), doc - action = dict(doc.to_index(self.catalog, self.field_types)) - action.update(action.pop('params', {})) - action['_index'] = action.pop('index') - action['_id'] = action.pop('id') - body = action.pop('body', None) - action = {doc.op_type.name: action} - return action, body - - log.info('Writing documents using streaming_bulk().') - - # We cannot use parallel_bulk() for 1024+ actions because Lambda doesn't - # support shared memory. See the issue below for details. - # - # https://github.com/DataBiosphere/azul/issues/3200 - # - # Another caveat to keep in mind is that streaming_bulk() may still - # exceed the maximum request size if one or more actions exceed it. - # There is no way to split a single action and hence a single document - # into multiple requests. - # - # Technically, we're not supposed to pass Document instances in the - # `action` parameter but we're exploiting the undocumented fact that the - # method immediately maps the value of the `expand_action_callback` - # parameter over the list passed in the `actions` parameter. - response = streaming_bulk(client=self.es_client, - actions=list(documents.values()), - expand_action_callback=expand_action, - refresh=self.refresh, - raise_on_error=False, - max_chunk_bytes=config.max_chunk_size) - for success, info in response: - op_type, info = one(info.items()) - assert op_type in OpType.__members__, op_type - coordinates = DocumentCoordinates.from_hit(info) - doc = documents[coordinates] - if success: - self._on_success(doc) - else: - if info['status'] == 409: - self._on_conflict(doc, info) - else: - self._on_error(doc, info) - - def _on_success(self, doc: Document): - coordinates = doc.coordinates - self.conflicts.pop(coordinates, None) - self.errors.pop(coordinates, None) - if isinstance(doc, Aggregate): - log.debug('Successfully wrote %s with %i contribution(s).', - coordinates, doc.num_contributions) - else: - log.debug('Successfully wrote %s.', coordinates) - - def _on_error(self, doc: Document, e: Exception | JSON): - self.errors[doc.coordinates] += 1 - if self.error_retry_limit is None or self.errors[doc.coordinates] <= self.error_retry_limit: - action = 'retrying' - self.retries.add(doc.coordinates) - else: - action = 'giving up' - log.warning('There was a general error with document %r: %r. Total # of errors: %i, %s.', - doc.coordinates, e, self.errors[doc.coordinates], action, - exc_info=isinstance(e, Exception)) - - def _on_conflict(self, doc: Document, e: Exception | JSON): - self.conflicts[doc.coordinates] += 1 - self.errors.pop(doc.coordinates, None) # a conflict resets the error count - if self.conflict_retry_limit is None or self.conflicts[doc.coordinates] <= self.conflict_retry_limit: - action = 'retrying' - self.retries.add(doc.coordinates) - else: - action = 'giving up' - - def warn(): - log.warning('There was a conflict with document %r: %r. Total # of errors: %i, %s.', - doc.coordinates, e, self.conflicts[doc.coordinates], action) - - if doc.op_type is OpType.create: - try: - doc.op_type = OpType.index - except NotImplementedError: - # We don't expect all Document types will let us modify op_type - warn() - else: - log.warning('Document %r exists. Retrying with overwrite.', doc.coordinates) - else: - warn() - - def raise_on_errors(self): - if self.errors or self.conflicts: - log.warning('Failures: %r', self.errors) - log.warning('Conflicts: %r', self.conflicts) - raise RuntimeError('Failed to index documents. Failures: %i, conflicts: %i.' % - (len(self.errors), len(self.conflicts))) - - -class EventualConsistencyException(RuntimeError): - pass diff --git a/src/azul/indexer/lambda_iam_policy.py b/src/azul/indexer/lambda_iam_policy.py deleted file mode 100644 index ac5d53f88e..0000000000 --- a/src/azul/indexer/lambda_iam_policy.py +++ /dev/null @@ -1,167 +0,0 @@ -from azul import ( - config, -) -from azul.collections import ( - alist, -) -from azul.deployment import ( - aws, -) -from azul.terraform import ( - chalice, -) - -direct_access_role = config.dss_direct_access_role('indexer') - -policy = { - 'Version': '2012-10-17', - 'Statement': [ - { - 'Effect': 'Allow', - 'Action': [ - 'logs:CreateLogGroup', - 'logs:CreateLogStream', - 'logs:PutLogEvents' - ], - 'Resource': 'arn:aws:logs:*:*:*' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'es:ESHttpDelete', - 'es:ESHttpGet', - 'es:ESHttpHead', - 'es:ESHttpPut', - 'es:ESHttpPost', - 'es:ESHttpDelete' - ], - 'Resource': f'arn:aws:es:{aws.region_name}:{aws.account}:domain/{config.es_domain}/*' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'es:DescribeElasticsearchDomain' - ], - 'Resource': f'arn:aws:es:{aws.region_name}:{aws.account}:domain/{config.es_domain}' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'sqs:ChangeMessageVisibility*', - 'sqs:DeleteMessage*', - 'sqs:ReceiveMessage', - 'sqs:SendMessage' - ], - 'Resource': [ - f'arn:aws:sqs:{aws.region_name}:{aws.account}:{name}' - for name in config.work_queue_names - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'sqs:GetQueueAttributes', - 'sqs:GetQueueUrl', - ], - 'Resource': [ - f'arn:aws:sqs:{aws.region_name}:{aws.account}:{name}' - for name in config.all_queue_names - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'sqs:ListQueues' - ], - 'Resource': [ - f'arn:aws:sqs:{aws.region_name}:{aws.account}:*' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'secretsmanager:GetSecretValue' - ], - 'Resource': [ - f'arn:aws:secretsmanager:{aws.region_name}:{aws.account}:secret:*' - ] - }, - *( - [ - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObject', - ], - 'Resource': [ - f'arn:aws:s3:::{aws.dss_main_bucket(config.dss_endpoint)}/*', - ] - }, - ] if config.dss_endpoint else [] - ), - *( - [ - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObject', - ], - 'Resource': [ - f'arn:aws:s3:::{aws.logs_bucket}/{prefix}' - for prefix in ( - config.alb_access_log_path_prefix('*', deployment=None), - config.s3_access_log_path_prefix('*', deployment=None), - ) - ] - }, - ] if config.enable_log_forwarding else [] - ), - *( - [ - { - 'Effect': 'Allow', - 'Action': [ - 's3:ListBucket', - 's3:GetObject', - 's3:PutObject', - ], - 'Resource': [ - f'arn:aws:s3:::{resource}' - for bucket in alist(aws.mirror_bucket, config.mirror_bucket) - for resource in [bucket, f'{bucket}/*'] - ] - } - ] if config.enable_mirroring else [] - ), - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObject', - 's3:PutObject' - ], - 'Resource': [ - '${aws_s3_bucket.%s.arn}/health/*' % config.storage_term, - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'ssm:GetParameter' - ], - 'Resource': [ - f'arn:aws:ssm:{aws.region_name}:{aws.account}:parameter/dcp/*' - ] - }, - *( - [ - { - 'Effect': 'Allow', - 'Action': 'sts:AssumeRole', - 'Resource': direct_access_role - } - ] if direct_access_role is not None else [ - ] - ), - *chalice.vpc_lambda_iam_policy() - ] -} diff --git a/src/azul/indexer/log_forwarding_controller.py b/src/azul/indexer/log_forwarding_controller.py deleted file mode 100644 index 1907f75a55..0000000000 --- a/src/azul/indexer/log_forwarding_controller.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -import sys - -import chalice.app - -from azul import ( - cached_property, -) -from azul.chalice import ( - AppController, -) -from azul.indexer.log_forwarding_service import ( - ALBLogForwardingService, - LogForwardingService, - S3AccessLogForwardingService, -) - - -class LogForwardingController(AppController): - """ - Forward logs from an Application Load Balancer (ALB) or S3 to standard output. - When this behavior is invoked via an AWS Lambda function, the output is - forwarded to the default CloudWatch log group associated with the function - """ - - @cached_property - def alb(self) -> LogForwardingService: - return ALBLogForwardingService() - - @cached_property - def s3(self) -> LogForwardingService: - return S3AccessLogForwardingService() - - def _forward_logs(self, - event: chalice.app.S3Event, - service: LogForwardingService - ) -> None: - for message in service.read_logs(event.bucket, event.key): - json.dump(message, sys.stdout) - sys.stdout.write('\n') - sys.stdout.flush() - - def forward_alb_logs(self, event: chalice.app.S3Event) -> None: - self._forward_logs(event, self.alb) - - def forward_s3_access_logs(self, event: chalice.app.S3Event) -> None: - self._forward_logs(event, self.s3) diff --git a/src/azul/indexer/log_forwarding_service.py b/src/azul/indexer/log_forwarding_service.py deleted file mode 100644 index ee1e7bec87..0000000000 --- a/src/azul/indexer/log_forwarding_service.py +++ /dev/null @@ -1,183 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -import csv -from datetime import ( - datetime, -) -import gzip -from typing import ( - Iterable, - Iterator, - Sequence, -) -import urllib.parse - -from botocore.response import ( - StreamingBody, -) - -from azul import ( - cached_property, - require, -) -from azul.deployment import ( - aws, -) -from azul.types import ( - MutableJSON, -) - - -class LogForwardingService(metaclass=ABCMeta): - - def read_logs(self, bucket: str, key: str) -> Iterator[MutableJSON]: - response = aws.s3.get_object(Bucket=bucket, Key=key) - body = self._read_log(response['Body']) - for message in self._parse_log_lines(body): - message['_source_bucket'] = bucket - message['_source_key'] = key - yield message - - def _parse_log_lines(self, file_body: Iterable[str]) -> Iterator[MutableJSON]: - # CSV format escapes the quotechar by repeating it. This cannot - # occur in the logs because quotations marks occurring within the - # field values are escaped. AWS does not document how the access - # logs are encoded, but our experiments indicate that characters - # including quotation marks, backslashes, and non-ASCII characters - # are escaped when they occur in access logs. ALB logs using a syntax - # based on NGINX log format - # (http://nginx.org/en/docs/http/ngx_http_log_module.html#log_format), - # while S3 logs use URL-encoding. For example, quotation marks are - # represented as `\x22` and `%22` in ALB and S3 logs respectively. - for row in csv.reader(file_body, delimiter=' ', quotechar='"'): - # When new fields are introduced, they are added at the end of - # the log entry, so observing more fields than expected does not - # indicate a problem. - require(len(row) >= len(self.fields), 'Missing expected fields') - fields = dict(zip(self.fields, row)) - yield fields - - @abstractmethod - def _read_log(self, response: StreamingBody) -> Iterable[str]: - """ - Read the given body of an object from the log bucket and return the - individual lines contained therein. - """ - raise NotImplementedError - - @property - @abstractmethod - def fields(self) -> Sequence[str]: - raise NotImplementedError - - -class ALBLogForwardingService(LogForwardingService): - """ - Parse logs from an Application Load Balancer. - Adapted from https://github.com/rupertbg/aws-load-balancer-logs-to-cloudwatch - """ - - def _read_log(self, response: StreamingBody) -> Iterable[str]: - with gzip.open(response, mode='rt', encoding='ascii') as f: - yield from f - - # https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html#access-log-entry-syntax - @cached_property - def fields(self) -> Sequence[str]: - return [ - 'type', - 'time', - 'elb', - 'client:port', - 'target:port', - 'request_processing_time', - 'target_processing_time', - 'response_processing_time', - 'elb_status_code', - 'target_status_code', - 'received_bytes', - 'sent_bytes', - 'request', - 'user_agent', - 'ssl_cipher', - 'ssl_protocol', - 'target_group_arn', - 'trace_id', - 'domain_name', - 'chosen_cert_arn', - 'matched_rule_priority', - 'request_creation_time', - 'actions_executed', - 'redirect_url', - 'error_reason', - 'target:port_list', - 'target_status_code_list' - ] - - -class S3AccessLogForwardingService(LogForwardingService): - """ - Parse access logs for S3. - """ - - def _read_log(self, response: StreamingBody) -> Iterable[str]: - for line in response.iter_lines(): - # AWS does not document what encoding is used for these log objects. - # Using Latin-1 ensures the log forwarder won't fail due to decoding - # errors, since every octet is a valid Latin-1 character. The - # strings will be re-encoded using UTF-8, so the binary content of - # the CloudWatch log messages will differ from the S3 log if the - # latter contains characters that can't be encoded using UTF-8. This - # decision was made to minimize time spent diagnosing these decoding - # errors. - yield line.decode('latin1') - - def _parse_log_lines(self, file_body: Iterable[str]) -> Iterator[MutableJSON]: - for message in super()._parse_log_lines(file_body): - # For some reason, AWS does not quote the `time` field, - # which contains a space between the seconds and timezone offset. - # All other fields appear to properly use quotes as needed. - time = f"{message.pop('time_1')} {message.pop('time_2')}" - # Verify that the restored field matches the expected format - datetime.strptime(time, '[%d/%b/%Y:%H:%M:%S %z]') - message['time'] = time.strip('[]') - # Experiments indicate that the `key` field is url-encoded *twice*, - # e.g., a quotation mark is represented as "%2522" - message['key'] = urllib.parse.unquote(urllib.parse.unquote(message['key'])) - yield message - - @cached_property - def fields(self) -> Sequence[str]: - # https://docs.aws.amazon.com/AmazonS3/latest/userguide/LogFormat.html#log-record-fields - return [ - 'bucket_owner', - 'bucket', - # See comment in `_read_logs` - 'time_1', - 'time_2', - 'remote_ip', - 'requester', - 'request_id', - 'operation', - 'key', - 'request_uri', - 'http_status', - 'error_code', - 'bytes_sent', - 'object_size', - 'total_time', - 'turn_around_time', - 'referer', - 'user_agent', - 'version_id', - 'host_id', - 'signature_version', - 'cipher_suite', - 'authentication_type', - 'host_header', - 'tls_version', - 'access_point_arn', - 'acl_required', - ] diff --git a/src/azul/indexer/mirror_controller.py b/src/azul/indexer/mirror_controller.py deleted file mode 100644 index 49fd0d6b88..0000000000 --- a/src/azul/indexer/mirror_controller.py +++ /dev/null @@ -1,331 +0,0 @@ -from functools import ( - partial, -) -import logging -from typing import ( - Any, - Iterable, - Sequence, - cast, -) - -import chalice -from chalice.app import ( - SQSRecord, -) - -from azul import ( - CatalogName, - R, - cache, - cached_property, - config, -) -from azul.azulclient import ( - AzulClient, - MirrorAction, -) -from azul.chalice import ( - LambdaMetric, -) -from azul.digests import ( - Hasher, - get_resumable_hasher, - hasher_from_str, - hasher_to_str, -) -from azul.indexer import ( - SourceRef, -) -from azul.indexer.action_controller import ( - ActionController, -) -from azul.indexer.mirror_service import ( - FilePart, - MirrorService, -) -from azul.plugins import ( - File, - RepositoryPlugin, -) -from azul.queues import ( - SQSFifoMessage, - SQSMessage, -) -from azul.schemas import ( - SchemaController, -) -from azul.types import ( - JSON, - json_element_strings, - json_mapping, - json_str, -) - -log = logging.getLogger(__name__) - - -class MirrorController(ActionController[MirrorAction], SchemaController): - - @cached_property - def client(self) -> AzulClient: - return AzulClient() - - @property - def actions_are_fifo(self) -> bool: - return True - - @cache - def service(self, catalog: CatalogName) -> MirrorService: - schema_url_func = partial(self.schema_url, facility='mirror') - return MirrorService(catalog=catalog, schema_url_func=schema_url_func) - - def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return self.client.repository_plugin(catalog) - - def handlers(self) -> dict[str, Any]: - if config.enable_mirroring: - @self.app.metric_alarm(metric=LambdaMetric.errors, - threshold=int(config.mirroring_concurrency * 2 / 3), - period=5 * 60) - @self.app.metric_alarm(metric=LambdaMetric.throttles, - threshold=int(96000 / config.mirroring_concurrency), - period=5 * 60) - @self.app.on_sqs_message(queue=config.mirror_queue.name, - batch_size=1) - def mirror(event: chalice.app.SQSEvent): - self.mirror(event) - - return super().handlers() | locals() - - def mirror(self, event: Iterable[SQSRecord]): - self._handle_events(event, self._mirror) - - def _mirror(self, action: MirrorAction, message: JSON): - if action is MirrorAction.mirror_source: - self.mirror_source(json_str(message['catalog']), - json_mapping(message['source'])) - elif action is MirrorAction.mirror_partition: - self.mirror_partition(json_str(message['catalog']), - json_mapping(message['source']), - json_str(message['prefix'])) - elif action is MirrorAction.mirror_file: - self.mirror_file(json_str(message['catalog']), - json_mapping(message['file'])) - elif action is MirrorAction.mirror_part: - self.mirror_file_part(json_str(message['catalog']), - json_mapping(message['file']), - json_mapping(message['part']), - json_str(message['upload_id']), - list(json_element_strings(message['etags'])), - json_str(message['hasher'])) - elif action is MirrorAction.finalize_file: - self.finalize_file(json_str(message['catalog']), - json_mapping(message['file']), - json_str(message['upload_id']), - list(json_element_strings(message['etags'])), - json_str(message['hasher'])) - else: - assert False, action - - def mirror_source(self, catalog: CatalogName, source_json: JSON): - plugin = self.repository_plugin(catalog) - source = plugin.source_ref_cls.from_json(source_json) - source = plugin.partition_source_for_mirroring(catalog, source) - prefix = source.spec.prefix - log.info('Queueing %d partitions of source %r in catalog %r', - prefix.num_partitions, str(source.spec), catalog) - - def message(partition: str) -> SQSMessage: - log.debug('Queueing partition %r', partition) - return self.mirror_partition_message(catalog, source, partition) - - messages = map(message, prefix.partition_prefixes()) - self.client.queue_mirror_messages(messages) - - def mirror_partition(self, - catalog: CatalogName, - source_json: JSON, - prefix: str - ): - plugin = self.repository_plugin(catalog) - source = plugin.source_ref_cls.from_json(source_json) - files = plugin.list_files(source, prefix) - - def messages() -> Iterable[SQSMessage]: - for file in files: - log.debug('Queueing file %r', file) - yield self.mirror_file_message(catalog, source, file) - - self.client.queue_mirror_messages(messages()) - log.info('Queued %d files in partition %r of source %r in catalog %r', - len(files), prefix, str(source), catalog) - - def mirror_file(self, - catalog: CatalogName, - file_json: JSON - ): - file = self.load_file(catalog, file_json) - assert file.size is not None, R('File size unknown', file) - - file_is_large = file.size > 1.5 * 1024 ** 3 - deployment_is_stable = (config.deployment.is_stable - and not config.deployment.is_unit_test - and catalog not in config.integration_test_catalogs) - - service = self.service(catalog) - if file_is_large and not deployment_is_stable: - log.info('Not mirroring file to save cost: %r', file) - elif service.info_exists(file): - log.info('File is already mirrored, skipping upload: %r', file) - elif service.file_exists(file): - assert False, R('File object is already present', file) - else: - part_size = FilePart.default_size - if file.size <= part_size: - log.info('Mirroring file via standard upload: %r', file) - service.mirror_file(file) - log.info('Successfully mirrored file via standard upload: %r', file) - else: - log.info('Mirroring file via multi-part upload: %r', file) - hasher = get_resumable_hasher(file.digest.type) - upload_id = service.begin_mirroring_file(file) - first_part = FilePart.first(file, part_size) - log.info('Uploading part #%d of file %r', first_part.index, file) - etag = service.mirror_file_part(file, - first_part, - upload_id, - hasher) - next_part = first_part.next(file) - assert next_part is not None - log.info('Queueing part #%d of file %r', next_part.index, file) - message = self.mirror_part_message(catalog, - file, - next_part, - upload_id, - [etag], - hasher) - self.client.queue_mirror_messages([message]) - - def mirror_file_part(self, - catalog: CatalogName, - file_json: JSON, - part_json: JSON, - upload_id: str, - etags: Iterable[str], - hasher_data: str - ): - file = self.load_file(catalog, file_json) - part = FilePart.from_json(part_json) - hasher = hasher_from_str(hasher_data) - log.info('Uploading part #%d of file %r', part.index, file) - service = self.service(catalog) - etag = service.mirror_file_part(file, part, upload_id, hasher) - etags = [*etags, etag] - next_part = part.next(file) - if next_part is None: - log.info('File fully uploaded in %d parts: %r', len(etags), file) - message = self.finalize_file_message(catalog, - file, - upload_id, - etags, - hasher) - else: - log.info('Queueing part #%d of file %r', next_part.index, file) - message = self.mirror_part_message(catalog, - file, - next_part, - upload_id, - etags, - hasher) - self.client.queue_mirror_messages([message]) - - def finalize_file(self, - catalog: CatalogName, - file_json: JSON, - upload_id: str, - etags: Sequence[str], - hasher_data: str - ): - file = self.load_file(catalog, file_json) - assert len(etags) > 0 - hasher = hasher_from_str(hasher_data) - service = self.service(catalog) - service.finish_mirroring_file(file=file, - upload_id=upload_id, - etags=etags, - hasher=hasher) - log.info('Successfully mirrored file via multi-part upload: %r', file) - - def load_file(self, catalog: CatalogName, file: JSON) -> File: - return self.client.metadata_plugin(catalog).file_class.from_json(file) - - def mirror_partition_message(self, - catalog: CatalogName, - source: SourceRef, - prefix: str - ) -> SQSFifoMessage: - return SQSFifoMessage( - body={ - 'action': MirrorAction.mirror_partition.to_json(), - 'catalog': catalog, - 'source': cast(JSON, source.to_json()), - 'prefix': prefix - }, - group_id=f'{source.id}:{prefix}' - ) - - def mirror_file_message(self, - catalog: CatalogName, - source: SourceRef, - file: File, - ) -> SQSFifoMessage: - return SQSFifoMessage( - body={ - 'action': MirrorAction.mirror_file.to_json(), - 'catalog': catalog, - 'source': cast(JSON, source.to_json()), - 'file': file.to_json() - }, - group_id=file.digest.value - ) - - def mirror_part_message(self, - catalog: CatalogName, - file: File, - part: FilePart, - upload_id: str, - etags: Sequence[str], - hasher: Hasher - ) -> SQSFifoMessage: - return SQSFifoMessage( - body={ - 'catalog': catalog, - 'file': file.to_json(), - 'upload_id': upload_id, - 'action': MirrorAction.mirror_part.to_json(), - 'part': part.to_json(), - 'etags': etags, - 'hasher': hasher_to_str(hasher) - }, - group_id=file.digest.value - ) - - def finalize_file_message(self, - catalog: CatalogName, - file: File, - upload_id: str, - etags: Sequence[str], - hasher: Hasher - ) -> SQSFifoMessage: - return SQSFifoMessage( - body={ - 'catalog': catalog, - 'file': file.to_json(), - 'upload_id': upload_id, - 'action': MirrorAction.finalize_file.to_json(), - 'etags': etags, - 'hasher': hasher_to_str(hasher) - }, - group_id=file.digest.value - ) diff --git a/src/azul/indexer/mirror_service.py b/src/azul/indexer/mirror_service.py deleted file mode 100644 index 9f9d8264ed..0000000000 --- a/src/azul/indexer/mirror_service.py +++ /dev/null @@ -1,386 +0,0 @@ -import json -import logging -import math -import string -import time -from typing import ( - ClassVar, - Protocol, - Self, - Sequence, - TYPE_CHECKING, -) - -import attr -import attrs -from furl import ( - furl, -) - -from azul import ( - CatalogName, - JSON, - R, - cached_property, - config, - mutable_furl, -) -from azul.attrs import ( - SerializableAttrs, -) -from azul.auth import ( - Authentication, -) -from azul.deployment import ( - aws, -) -from azul.digests import ( - Hasher, - get_resumable_hasher, -) -from azul.drs import ( - AccessMethod, -) -from azul.http import ( - HasCachedHttpClient, -) -from azul.plugins import ( - File, - RepositoryFileDownload, - RepositoryPlugin, -) -from azul.service.storage_service import ( - StorageObjectNotFound, - StorageService, -) - -if TYPE_CHECKING: - from mypy_boto3_s3.service_resource import ( - MultipartUpload, - ) - -log = logging.getLogger(__name__) - - -@attrs.frozen(kw_only=True) -class FilePart(SerializableAttrs): - """ - A part of a mirrored file - """ - #: The part number, starting at 0 for the first part, unlike S3 API part - #: numbers, which start at 1. - #: - index: int - - #: Offset of the first byte of this part, relative to the start of the file - offset: int - - #: The size of this part - #: - size: int - - #: Various S3 quotas related to parts and part sizes - #: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html - #: - min_size: ClassVar[int] = 5 * 1024 ** 2 - max_size: ClassVar[int] = 5 * 1024 ** 3 - max_num_parts: ClassVar[int] = 10000 - - #: We observe a download rate of ~14 MB/s. Download time should ideally be - #: 1/4 of the Lambda timeout. Since we track the ETag of each part in SQS - #: messages, message size becomes another constraint: we observe ETags to be - #: 32 byte hexadecimal strings which, if represented in a JSON array, take - #: up 35 bytes per item, 36 if the comma is followed by a space. With a - #: maximum SQS message size of 256 KiB, we can store approximately 7280 - #: ETags in an SQS messages, so the largest file we can mirror using a part - #: size of 256 MiB is 1.5 TiB. - #: - default_size: ClassVar[int] = 256 * 1024 ** 2 - - @classmethod - def first(cls, file: File, part_size: int) -> Self: - """ - The first part of the given file, using the given part size. - """ - assert file.size is not None, R( - 'File size unknown', file) - assert cls.min_size <= part_size <= cls.max_size, R( - 'Invalid part size', part_size) - part_count = math.ceil(file.size / part_size) - assert part_count <= cls.max_num_parts, R( - 'Part size is too small for this file', part_size, file) - return cls(index=0, offset=0, size=min(part_size, file.size)) - - def next(self, file: File) -> Self | None: - """ - The part following this part in the given file, or None if this is the - last part. - """ - assert file.size is not None, R('File size unknown', file) - next_offset = self.offset + self.size - if next_offset == file.size: - return None - elif 0 < next_offset < file.size: - next_index = self.index + 1 - next_size = min(self.size, file.size - next_offset) - return attr.evolve(self, index=next_index, offset=next_offset, size=next_size) - else: - assert False, R('Part range exceeds file size', self, file) - - -@attrs.frozen(kw_only=True) -class MirrorFileDownload(RepositoryFileDownload): - _location: str - - @property - def retry_after(self) -> int | None: - return None - - @property - def location(self) -> str | None: - return self._location - - def update(self, - plugin: RepositoryPlugin, - authentication: Authentication | None - ) -> None: - pass - - -@attrs.frozen(kw_only=True, slots=False) -class BaseMirrorService: - catalog: CatalogName - - @cached_property - def _storage(self) -> StorageService: - bucket = config.mirror_bucket - if bucket is None or self.catalog in config.integration_test_catalogs: - bucket = aws.mirror_bucket - return StorageService(bucket) - - def get_mirror_url(self, file: File) -> str: - return self._storage.get_presigned_url(key=self.mirror_object_key(file), - file_name=file.name, - content_type=file.content_type) - - def _get_info(self, file: File) -> JSON | None: - key = self.info_object_key(file) - try: - content = self._storage.get(key) - except StorageObjectNotFound: - return None - else: - json_content = json.loads(content) - content_type = json_content['content-type'] - if content_type != file.content_type: - # FIXME: Content type in mirror info objects inconsistent with index - # https://github.com/DataBiosphere/azul/issues/7193 - log.warning('Conflicting content type %r for file %r', content_type, file) - return json_content - - info_prefix, file_prefix = 'info', 'file' - - def mirror_object_key(self, file: File) -> str: - return self._file_key(self.file_prefix, file) - - def info_object_key(self, file: File) -> str: - return self._file_key(self.info_prefix, file, extension='.json') - - def info_exists(self, file: File) -> bool: - return self._get_info(file) is not None - - def file_exists(self, file: File) -> bool: - try: - self._storage.head(self.mirror_object_key(file)) - except StorageObjectNotFound: - return False - else: - return True - - def delete_it_files(self): - """ - Delete all objects (both file/ and info/) with the given catalog's - mirror prefix. Currently, the mirror prefix is only used to distinguish - IT catalogs from non-IT catalogs, so if an IT catalog is specified, - objects from *all* IT catalogs will be deleted, not just the specified - catalog. - """ - assert self.catalog in config.integration_test_catalogs, R( - 'Not an IT catalog', self.catalog) - prefix = self._mirror_prefix - assert len(prefix) > 1 and prefix.endswith('/'), prefix - keys = self._storage.list(prefix) - assert len(keys) <= 300, R('Too many objects', len(keys)) - self._storage.delete(keys, batch_size=100) - - @cached_property - def _mirror_prefix(self) -> str: - return '_it/' if self.catalog in config.integration_test_catalogs else '' - - def _file_key(self, - prefix: str, - file: File, - *, - extension: str = '' - ) -> str: - digest = file.digest - digest_value = digest.value.lower() - assert all(c in string.hexdigits for c in digest_value), R( - 'Expected a hexadecimal digest', digest) - mirror_prefix = self._mirror_prefix - return f'{mirror_prefix}{prefix}/{digest_value}.{digest.type}{extension}' - - -class SchemaUrlFunc(Protocol): - - def __call__(self, - *, - schema_name: str, - version: int - ) -> mutable_furl: ... - - -@attrs.frozen(kw_only=True, slots=False) -class MirrorService(BaseMirrorService, HasCachedHttpClient): - schema_url_func: SchemaUrlFunc - - # We don't store the mirrored files' actual content type(s) in S3's - # `Content-Type` metadata because a single file object may store the - # contents of multiple file metadata entities, which may declare different - # content types for the same data. When file objects are downloaded from the - # mirror bucket via Azul, this value will be overridden with the requested - # file's actual content type via a query parameter in the signed URL. - # - # Files mirrored prior to this change may erroneously specify a different - # value in the `Content-Type` metadata. We haven't found an efficient way to - # update the content type of an existing object without copying its data. - # - file_object_content_type = 'application/octet-stream' - - @cached_property - def repository_plugin(self) -> RepositoryPlugin: - return RepositoryPlugin.load(self.catalog).create(self.catalog) - - def mirror_file(self, file: File): - """ - Upload the file in a single request. For larger files, use - :meth:`begin_mirroring_file` instead. - """ - file_content = self._download(file) - self._storage.put(object_key=self.mirror_object_key(file), - data=file_content, - content_type=self.file_object_content_type, - overwrite=False) - hasher = get_resumable_hasher(file.digest.type) - hasher.update(file_content) - self._verify_digest(file, hasher) - self._put_info(file) - - def begin_mirroring_file(self, file: File) -> str: - """ - Initiate a multipart upload of the file's content and return the upload - ID. - """ - storage = self._storage - key = self.mirror_object_key(file) - upload = storage.create_multipart_upload(object_key=key, - content_type=self.file_object_content_type) - return upload.id - - def mirror_file_part(self, - file: File, - part: FilePart, - upload_id: str, - hasher: Hasher - ) -> str: - """ - Upload a part of a file to a multipart upload begun with - :meth:`begin_mirroring_file` and return the uploaded part's ETag. - The provided hasher is mutated to incorporated the part's content. - """ - upload = self._get_upload(file, upload_id) - file_content = self._download(file, part) - hasher.update(file_content) - return self._storage.upload_multipart_part(file_content, - part.index + 1, - upload) - - def finish_mirroring_file(self, - *, - file: File, - upload_id: str, - etags: Sequence[str], - hasher: Hasher - ): - """ - Complete a multipart upload begun with :meth:`begin_mirroring_file`. - """ - upload = self._get_upload(file, upload_id) - self._storage.complete_multipart_upload(upload, - etags, - overwrite=False) - self._verify_digest(file, hasher) - self._get_info(file) - self._put_info(file) - - def info_object(self, file: File) -> JSON: - return { - 'content-type': file.content_type, - '$schema': str(self.schema_url_func(schema_name='info', version=1)) - } - - def _put_info(self, file: File): - key = self.info_object_key(file) - content = self.info_object(file) - self._storage.put(object_key=key, - data=json.dumps(content).encode(), - content_type='application/json') - - def _get_repository_url(self, file: File) -> furl: - assert config.is_tdr_enabled(self.catalog), R( - 'Only TDR catalogs are supported', self.catalog) - assert file.drs_uri is not None, R( - 'File cannot be downloaded', file) - drs = self.repository_plugin.drs_client(authentication=None) - access = drs.get_object(file.drs_uri, AccessMethod.gs) - assert access.method is AccessMethod.https, access - return furl(access.url) - - def _download(self, file: File, part: FilePart | None = None) -> bytes: - download_url = self._get_repository_url(file) - start = time.time() - if part is None: - headers = {} - size = file.size - expected_status = 200 - else: - headers = {'Range': f'bytes={part.offset}-{part.offset + part.size - 1}'} - size = part.size - expected_status = 206 - # Ideally we would stream the response, but boto only supports uploading - # from streams that are seekable. - response = self._http_client.request('GET', - str(download_url), - headers=headers) - if response.status == expected_status: - log.info('Downloaded %d bytes in %.3fs from file %r', - size, time.time() - start, file) - return response.data - else: - raise RuntimeError('Unexpected response from repository', response.status) - - def _get_upload(self, - file: File, - upload_id: str - ) -> 'MultipartUpload': - storage = self._storage - key = self.mirror_object_key(file) - return storage.load_multipart_upload(object_key=key, - upload_id=upload_id) - - def _verify_digest(self, file: File, hasher: Hasher): - expected_digest = file.digest - actual_digest_value = hasher.hexdigest() - assert expected_digest.value == actual_digest_value, R( - 'File digest value does not match its contents', - expected_digest, file) diff --git a/src/azul/indexer/transform.py b/src/azul/indexer/transform.py deleted file mode 100644 index d715e0fa4c..0000000000 --- a/src/azul/indexer/transform.py +++ /dev/null @@ -1,206 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections.abc import ( - Iterable, -) -from typing import ( - Optional, -) - -import attr - -from azul.collections import ( - alist, -) -from azul.indexer import ( - Bundle, - BundleFQID, - BundlePartition, -) -from azul.indexer.aggregate import ( - EntityAggregator, -) -from azul.indexer.document import ( - Contribution, - ContributionCoordinates, - EntityID, - EntityReference, - EntityType, - Replica, - ReplicaCoordinates, -) -from azul.indexer.field import ( - FieldTypes, -) -from azul.json import ( - json_hash, -) -from azul.types import ( - JSON, -) - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class Transformer(metaclass=ABCMeta): - bundle: Bundle - deleted: bool - - @classmethod - @abstractmethod - def entity_type(cls) -> EntityType: - """ - The type of outer entity this transformer creates and aggregates - contributions for. - """ - raise NotImplementedError - - def _replica_type(self, entity: EntityReference) -> str: - """ - The name of the type of replica emitted by this transformer for a given - entity. See :py:attr:`Replica.replica_type`. - """ - return entity.entity_type - - @abstractmethod - def _replica_contents(self, entity: EntityReference) -> JSON: - """ - The contents of the replica emitted by this transformer for a given - entity. - """ - raise NotImplementedError - - @classmethod - def inner_entity_types(cls) -> frozenset[str]: - """ - The set of types of inner entities that *do not* require aggregation in - an aggregate for an entity of this transformer's outer entity type. For - any *outer* entity of a certain type there is usually just one *inner* - entity of that same type, eliminating the need to aggregate multiple - inner entities. - """ - return frozenset((cls.entity_type(),)) - - @classmethod - @abstractmethod - def field_types(cls) -> FieldTypes: - raise NotImplementedError - - @abstractmethod - def estimate(self, partition: BundlePartition) -> int: - """ - Return the expected number of contributions that would be returned by - a call to :meth:`transform()`. - """ - - @abstractmethod - def transform(self, - partition: BundlePartition - ) -> Iterable[Contribution | Replica]: - """ - Return the contributions by the current bundle to the entities it - contains metadata about. More than one bundle can contribute to a - particular entity and any such entity can receive contributions by more - than one bundle. Only after all bundles have been transformed, can the - contributions pertaining to a particular entity be aggregated into - a single index document containing exhaustive metadata about that - entity. - - :param partition: The partition of the bundle to return contributions - for. - """ - raise NotImplementedError - - @classmethod - @abstractmethod - def aggregator(cls, entity_type: EntityType) -> Optional[EntityAggregator]: - """ - Returns the aggregator to be used for inner entities of the given type - that occur in contributions to an entity of this transformer's (outer) - entity type. - """ - raise NotImplementedError - - def _contribution(self, - contents: JSON, - entity_id: EntityID - ) -> Contribution: - entity = EntityReference(entity_type=self.entity_type(), entity_id=entity_id) - coordinates = ContributionCoordinates(entity=entity, - bundle=self.bundle.fqid, - deleted=self.deleted) - return Contribution(coordinates=coordinates, - version=None, - source=self.bundle.fqid.source, - contents=contents) - - def _replica(self, - entity: EntityReference, - *, - root_hub: EntityID, - file_hub: EntityID | None, - ) -> Replica: - replica_type = self._replica_type(entity) - contents = self._replica_contents(entity) - coordinates = ReplicaCoordinates(content_hash=json_hash(contents).hexdigest(), - entity=entity) - return Replica(coordinates=coordinates, - version=None, - replica_type=replica_type, - contents=contents, - source=self.bundle.fqid.source, - # The other hubs will be added when the indexer - # consolidates duplicate replicas. - hub_ids=alist(file_hub, root_hub)) - - @classmethod - @abstractmethod - def inner_entity_id(cls, entity_type: EntityType, entity: JSON) -> EntityID: - """ - Return the identifier of the given inner entity. Typically, the - identifier is the value of a particular property of the entity. - """ - raise NotImplementedError - - @classmethod - @abstractmethod - def reconcile_inner_entities(cls, - entity_type: EntityType, - *, - this: tuple[JSON, BundleFQID], - that: tuple[JSON, BundleFQID], - ) -> tuple[JSON, BundleFQID]: - """ - Given two potentially different copies of an inner entity, return the - copy that should be incorporated into the aggregate for an outer entity - of this transformer's entity type. Each copy is accompanied by the FQID - of the bundle that contributed it. Typically, the copy from the more - recently updated bundle is returned, but other implementations, such as - merging the two copies are plausible, too. - - :param entity_type: The type of the entity to reconcile - - :param this: One copy of the entity and the bundle it came from - - :param that: Another copy of the entity and the bundle it came from - - :return: The copy to use and the bundle it came from. The return value - may be passed to this method again in case there is yet another - copy to reconcile. In that case, the return value will be - passed as the ``this`` argument. - """ - raise NotImplementedError - - -class ReplicaTransformer(Transformer, metaclass=ABCMeta): - - @classmethod - @abstractmethod - def hot_entity_types(cls) -> dict[EntityType, EntityType]: - """ - The types of entities that do not explicitly track their hubs in - replica documents. Keys describe untransformed entities in a bundle; - values describe transformed inner entities in the index. - """ - raise NotImplementedError diff --git a/src/azul/iterators.py b/src/azul/iterators.py deleted file mode 100644 index 17bbd94808..0000000000 --- a/src/azul/iterators.py +++ /dev/null @@ -1,99 +0,0 @@ -from collections.abc import ( - Iterable, - Iterator, -) -from functools import ( - partial, -) -from itertools import ( - islice, -) -import random -from typing import ( - Callable, - TypeVar, - cast, -) - -from azul import ( - require, -) - -T = TypeVar('T') - - -# noinspection PyPep8Naming -class generable(Iterable[T]): - """ - Convert a generator into a true iterable, i.e. an iterable that is not an - iterator i.e., whose ``__iter__`` does not return ``self`` and that does not - have ``__next__``. - - A generator function: - - >>> def f(n): - ... for i in range(n): - ... yield i - - It returns an iterator that can only be consumed once: - - >>> g = f(3) - >>> list(g) - [0, 1, 2] - >>> list(g) - [] - - Wrapping the generator function with ``generable`` produces a true iterable - that can be consumed multiple times: - - >>> g = generable(f, 3) - >>> list(g) - [0, 1, 2] - >>> list(g) - [0, 1, 2] - """ - - def __init__(self, generator: Callable[..., Iterator[T]], *args, **kwargs): - self._generator = partial(generator, *args, **kwargs) - - def __iter__(self) -> Iterator[T]: - return self._generator() - - -def reservoir_sample(k: int, - it: Iterable[T], - *, - # The cast is safe because the `random` module has a - # function for every method in the `random.Random` class. - random: random.Random = cast(random.Random, random) - ) -> list[T]: - """ - Return a random choice of a given size from an iterable. - - https://stackoverflow.com/a/35671225/4171119 - - >>> r = random.Random(42) - - >>> reservoir_sample(5, '', random=r) - [] - - >>> reservoir_sample(5, 'abcd', random=r) - ['c', 'b', 'd', 'a'] - - >>> reservoir_sample(0, 'abcd', random=r) - [] - - >>> reservoir_sample(5, 'abcdefghijklmnopqrstuvwxyz', random=r) - ['x', 'l', 'a', 'n', 'b'] - """ - if k == 0: - return [] - require(k > 0, 'Sample size must not be negative', k, exception=ValueError) - it = iter(it) - sample = list(islice(it, k)) - random.shuffle(sample) - for i, item in enumerate(it, start=k + 1): - j = random.randrange(i) - if j < k: - sample[j] = item - return sample diff --git a/src/azul/json_freeze.py b/src/azul/json_freeze.py deleted file mode 100644 index 0b82e7f15f..0000000000 --- a/src/azul/json_freeze.py +++ /dev/null @@ -1,159 +0,0 @@ -from azul.types import ( - AnyJSON, - AnyMutableJSON, -) -from azul.vendored.frozendict import ( - frozendict, -) - - -def freeze(x: AnyJSON) -> AnyJSON: - """ - Return a copy of the argument JSON structure with every `dict` in that - structure converted to a `frozendict` and every list converted to a tuple. - - Frozen JSON structures are immutable and can be added to sets or used as - keys in other dictionaries. - - >>> from copy import deepcopy - >>> k = freeze({"1":[2,3]}) - >>> k_ = deepcopy(k) - >>> k is k_ - False - >>> {k: 42}[k_] - 42 - - Freeze is idempotent - - >>> thaw(freeze(freeze({"1":[2,3]}))) - {'1': [2, 3]} - """ - if isinstance(x, (dict, frozendict)): - return frozendict((k, freeze(v)) for k, v in x.items()) - elif isinstance(x, (list, tuple)): - return tuple(freeze(v) for v in x) - elif isinstance(x, (bool, str, int, float)) or x is None: - return x - else: - assert False, f'Cannot handle values of type {type(x)}' - - -def thaw(x: AnyJSON) -> AnyMutableJSON: - """ - Return a copy of the argument JSON structure with every `frozendict` in that - structure converted to a `dict` and every tuple converted to a list. - - >>> d = {"1":[2, 3]} - >>> d_ = thaw(freeze(d)) - >>> d_ == d, d_ is d - (True, False) - - thaw() is idempotent - - >>> thaw(thaw(freeze(d))) - {'1': [2, 3]} - """ - if isinstance(x, (frozendict, dict)): - return {k: thaw(v) for k, v in x.items()} - elif isinstance(x, (tuple, list)): - return [thaw(v) for v in x] - elif isinstance(x, (bool, str, int, float)) or x is None: - return x - else: - assert False, f'Cannot handle values of type {type(x)}' - - -def sort_frozen(x: AnyJSON) -> AnyJSON: - """ - Attempt to recursively sort a frozen JSON structure. Not all JSON structures - are supported. The restrictions are noted below. This method is really only - useful when comparing Elasticsearch documents. Elasticsearches semantics - for lists is that the order in which list elements occur doesn't really - matter. The "term" query {"foo": "bar"} matches a documents with - "foo": "bar" and ones with "foo":["baz","bar"]. - - >>> sort_frozen(freeze({"2": [{"3": True}, {"4": [5, None, None]}], "1": 1})) - (('1', 1), ('2', ((('3', True),), (('4', (None, None, 5)),)))) - - Tuples in the frozen JSON must only contain values that are either None or - of types that are comparable against each other. All None values in a tuple - are put first in the sorted tuple, as if None were less than any other - value. - - >>> sort_frozen(freeze([0, ""])) - Traceback (most recent call last): - ... - TypeError: '<' not supported between instances of 'str' and 'int' - - Note that True == 0 and False == 1 - - >>> sort_frozen(freeze([1, 0, False])) - (0, False, 1) - - >>> sort_frozen(freeze([{'x':True}, {'x': None}])) - ((('x', None),), (('x', True),)) - """ - if isinstance(x, frozendict): - # Note that each key occurs exactly once, so there will be no ties that - # have to be broken by comparing the values. The values may be of - # heterogeneous types and therefore can't be compared. - return tuple(sorted((k, sort_frozen(v)) for k, v in x.items())) - elif isinstance(x, tuple): - return tuple(sorted((sort_frozen(v) for v in x), key=TupleKey)) - elif isinstance(x, (bool, str, int, float)) or x is None: - return x - else: - assert False, f'Cannot handle values of type {type(x)}' - - -class TupleKey(object): - """ - Tuples are compared element-wise so (None,) < (True,) involves None < True - which fails. To solve this, we wrap all tuple elements. Note that this - means recursively wrapping tuple elements that are tuples themselves. - - >>> # noinspection PyTypeChecker - ... (None,) < (True,) - Traceback (most recent call last): - ... - TypeError: '<' not supported between instances of 'NoneType' and 'bool' - - >>> TupleKey((None,)) < TupleKey((True,)) - True - - From https://docs.python.org/3.8/reference/datamodel.html#object.__hash__ - - > A class that overrides __eq__() and does not define __hash__() will have - > its __hash__() implicitly set to None. - - Just making sure - - >>> {TupleKey((True,)):1} - Traceback (most recent call last): - ... - TypeError: unhashable type: 'TupleKey' - """ - __slots__ = ['obj'] - - def __init__(self, obj): - if isinstance(obj, tuple): - obj = tuple(TupleKey(e) for e in obj) - self.obj = obj - - def __lt__(self, other): - if self.obj is None: - return other.obj is not None - else: - return other.obj is not None and self.obj < other.obj - - def __gt__(self, other): - raise NotImplementedError - - def __eq__(self, other): - return self.obj == other.obj - - def __le__(self, other): - raise NotImplementedError - - def __ge__(self, other): - raise NotImplementedError diff --git a/src/azul/lambda_layer.py b/src/azul/lambda_layer.py deleted file mode 100644 index 56fba00ae1..0000000000 --- a/src/azul/lambda_layer.py +++ /dev/null @@ -1,115 +0,0 @@ -from collections import ( - defaultdict, -) -import hashlib -import logging -from pathlib import ( - Path, -) -import shutil -import subprocess -from zipfile import ( - ZipFile, - ZipInfo, -) - -from azul import ( - cached_property, - config, -) -from azul.deployment import ( - aws, -) -from azul.files import ( - file_sha1, -) - -log = logging.getLogger(__name__) - - -class DependenciesLayer: - - @property - def s3(self): - return aws.s3 - - def _update_required(self) -> bool: - log.info('Checking for dependencies layer package at s3://%s/%s.', - aws.shared_bucket, self.object_key) - try: - # Since the object is content-addressed, just checking for the - # object's presence is sufficient - self.s3.head_object(Bucket=aws.shared_bucket, Key=self.object_key) - except self.s3.exceptions.ClientError as e: - if e.response['Error']['Code'] == '404': - return True - else: - raise - else: - return False - - layer_dir = Path(config.project_root) / 'lambdas' / 'layer' - - def update_layer(self): - if self._update_required(): - log.info('Generating new layer package ...') - out_dir = self.layer_dir / '.chalice' / 'terraform' - self._build_package(out_dir) - input_zip = out_dir / 'deployment.zip' - output_zip = out_dir / 'layer.zip' - self._filter_package(input_zip, output_zip) - self._validate_layer(output_zip) - log.info('Uploading layer package to S3 ...') - self.s3.upload_file(str(output_zip), aws.shared_bucket, self.object_key) - log.info('Successfully staged updated layer package.') - else: - log.info('Layer package already up-to-date.') - - def _build_package(self, out_dir): - # Delete Chalice's build cache because our layer cache eviction rules - # are stricter and we want a full rebuild. - try: - cache_dir = self.layer_dir / '.chalice' / 'deployments' - log.info('Removing deployment cache at %r', str(cache_dir)) - shutil.rmtree(cache_dir) - except FileNotFoundError: - pass - command = ['chalice', 'package', out_dir] - log.info('Running %r', command) - subprocess.run(command, cwd=self.layer_dir).check_returncode() - - def _filter_package(self, input_zip_path: Path, output_zip_path: Path): - """ - Filter a ZIP file, removing `app.py` and prefixing other archive member - paths with `python/`. - """ - log.info('Filtering %r to %r', str(input_zip_path), str(output_zip_path)) - with ZipFile(input_zip_path, 'r') as input_zip: - with ZipFile(output_zip_path, 'w') as output_zip: - for input in input_zip.infolist(): - if input.filename != 'app.py': - # ZipFile doesn't copy permissions. Setting permissions - # manually also requires setting other fields. - output = ZipInfo(filename='python/' + input.filename) - output.external_attr = input.external_attr - output.date_time = input.date_time - output.compress_type = input.compress_type - with input_zip.open(input, 'r') as rf: - with output_zip.open(output, 'w') as wf: - shutil.copyfileobj(rf, wf, length=1024 * 1024) - - def _validate_layer(self, layer_zip: Path): - with ZipFile(layer_zip, 'r') as z: - infos = z.infolist() - files = defaultdict(list) - for info in infos: - files[info.filename].append(info) - duplicates = {k: v for k, v in files.items() if len(v) > 1} - assert not duplicates, duplicates - - @cached_property - def object_key(self): - sha1 = hashlib.sha1() - for path in Path(config.chalice_bin).iterdir(): - sha1.update(file_sha1(path).encode()) - return f'azul/{config.deployment_stage}/{config.lambda_layer_key}/{sha1.hexdigest()}.zip' diff --git a/src/azul/lambdas.py b/src/azul/lambdas.py deleted file mode 100644 index 4ed12c197a..0000000000 --- a/src/azul/lambdas.py +++ /dev/null @@ -1,188 +0,0 @@ -import ast -import logging -import time -from typing import ( - Optional, - Self, - TYPE_CHECKING, -) - -import attr -from more_itertools import ( - one, -) - -from azul import ( - R, - cache, - config, -) -from azul.deployment import ( - aws, -) -from azul.modules import ( - load_app_module, -) - -if TYPE_CHECKING: - from mypy_boto3_lambda.type_defs import ( - FunctionConfigurationTypeDef, - ) - -log = logging.getLogger(__name__) - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class Lambda: - name: str - role: str - slot_location: Optional[str] - - @property - def is_contribution_lambda(self) -> bool: - for lambda_name in self._contribution_lambda_names(): - try: - # FIXME: Eliminate hardcoded separator - # https://github.com/databiosphere/azul/issues/2964 - resource_name, _ = config.unqualified_resource_name(self.name, - suffix='-' + lambda_name) - except AssertionError as e: - if not R.caused(e): - raise - else: - if resource_name == 'indexer': - return True - return False - - @classmethod - @cache - def _contribution_lambda_names(cls) -> frozenset[str]: - indexer = load_app_module('indexer') - notification_queue_names = { - config.notifications_queue.derive(retry=retry).unqual_name - for retry in (False, True) - } - - def has_notification_queue(handler) -> bool: - try: - queue = handler.queue - except AttributeError: - return False - else: - resource_name, _, _ = config.unqualified_resource_name_and_suffix(queue) - return resource_name in notification_queue_names - - return frozenset(( - handler.name - for handler in vars(indexer).values() - if has_notification_queue(handler) - )) - - @classmethod - def from_response(cls, response: 'FunctionConfigurationTypeDef') -> Self: - name = response['FunctionName'] - role = response['Role'] - try: - slot_location = response['Environment']['Variables']['AZUL_TDR_SOURCE_LOCATION'] - except KeyError: - slot_location = None - return cls(name=name, - role=role, - slot_location=slot_location) - - def __attrs_post_init__(self): - if self.slot_location is None: - assert not self.is_contribution_lambda, self - else: - allowed_locations = config.tdr_allowed_source_locations - assert self.slot_location in allowed_locations, self.slot_location - - -class Lambdas: - tag_name = 'azul-original-concurrency-limit' - - @property - def _lambda(self): - return aws.lambda_ - - def list_lambdas(self) -> list[Lambda]: - return [ - Lambda.from_response(function) - for response in self._lambda.get_paginator('list_functions').paginate() - for function in response['Functions'] - ] - - def manage_lambdas(self, enabled: bool): - paginator = self._lambda.get_paginator('list_functions') - lambda_prefixes = [config.qualified_resource_name(lambda_infix) for lambda_infix in config.lambda_names()] - assert all(lambda_prefixes) - for lambda_page in paginator.paginate(FunctionVersion='ALL', MaxItems=500): - for lambda_name in [metadata['FunctionName'] for metadata in lambda_page['Functions']]: - if any(lambda_name.startswith(prefix) for prefix in lambda_prefixes): - self.manage_lambda(lambda_name, enabled) - - def manage_lambda(self, lambda_name: str, enable: bool): - lambda_settings = self._lambda.get_function(FunctionName=lambda_name) - lambda_arn = lambda_settings['Configuration']['FunctionArn'] - lambda_tags = self._lambda.list_tags(Resource=lambda_arn)['Tags'] - lambda_name = lambda_settings['Configuration']['FunctionName'] - if enable: - if self.tag_name in lambda_tags.keys(): - original_concurrency_limit = ast.literal_eval(lambda_tags[self.tag_name]) - - if original_concurrency_limit is not None: - log.info(f'Setting concurrency limit for {lambda_name} back to {original_concurrency_limit}.') - self._lambda.put_function_concurrency(FunctionName=lambda_name, - ReservedConcurrentExecutions=original_concurrency_limit) - else: - log.info(f'Removed concurrency limit for {lambda_name}.') - self._lambda.delete_function_concurrency(FunctionName=lambda_name) - - lambda_arn = lambda_settings['Configuration']['FunctionArn'] - self._lambda.untag_resource(Resource=lambda_arn, TagKeys=[self.tag_name]) - else: - log.warning(f'{lambda_name} is already enabled.') - else: - if self.tag_name not in lambda_tags.keys(): - try: - concurrency = lambda_settings['Concurrency'] - except KeyError: - # If a lambda doesn't have a limit for concurrency - # executions, Lambda.Client.get_function() - # doesn't return a response with the key, `Concurrency`. - concurrency_limit = None - else: - concurrency_limit = concurrency['ReservedConcurrentExecutions'] - - log.info(f'Setting concurrency limit for {lambda_name} to zero.') - new_tag = {self.tag_name: repr(concurrency_limit)} - self._lambda.tag_resource(Resource=lambda_settings['Configuration']['FunctionArn'], Tags=new_tag) - self._lambda.put_function_concurrency(FunctionName=lambda_name, ReservedConcurrentExecutions=0) - else: - log.warning(f'{lambda_name} is already disabled.') - - def reset_lambda_roles(self): - client = self._lambda - lambda_names = set(config.lambda_names()) - - for lambda_ in self.list_lambdas(): - for lambda_name in lambda_names: - if lambda_.name.startswith(config.qualified_resource_name(lambda_name)): - other_lambda_name = one(lambda_names - {lambda_name}) - temporary_role = lambda_.role.replace( - config.qualified_resource_name(lambda_name), - config.qualified_resource_name(other_lambda_name) - ) - log.info('Temporarily updating %r to role %r', lambda_.name, temporary_role) - client.update_function_configuration(FunctionName=lambda_.name, - Role=temporary_role) - log.info('Updating %r to role %r', lambda_.name, lambda_.role) - while True: - try: - client.update_function_configuration(FunctionName=lambda_.name, - Role=lambda_.role) - except client.exceptions.ResourceConflictException: - log.info('Function %r is being updated. Retrying ...', lambda_.name) - time.sleep(1) - else: - break diff --git a/src/azul/modules.py b/src/azul/modules.py deleted file mode 100644 index e02dcb409b..0000000000 --- a/src/azul/modules.py +++ /dev/null @@ -1,78 +0,0 @@ -from collections.abc import ( - Mapping, -) -from importlib.abc import ( - Loader, -) -import importlib.util -import os -from pathlib import ( - Path, -) -from typing import ( - Any, -) - -from azul import ( - R, - config, -) -from azul.types import ( - not_none, -) - - -def load_module(path: str, module_name: str): - """ - Load a module from the .py file at the given path without affecting - `sys.path` or `sys.modules`. - - :param path: the file system path to the module file - (typically ending in .py) - - :param module_name: the value to assign to the __name__ attribute of the - module. - - :param module_attributes: a dictionary of additional attributes to set on - the module before executing it. These attributes - will be available at module scope when it is first - executed - - :return: the module - """ - spec = importlib.util.spec_from_file_location(module_name, path) - assert spec is not None, R('Unable to load module', module_name, path) - assert isinstance(spec.loader, Loader) - module = importlib.util.module_from_spec(spec) - setattr(module, _loaded_dynamically, True) - assert Path(path).samefile(not_none(module.__file__)) - assert module.__name__ == module_name - spec.loader.exec_module(module) - return module - - -def load_app_module(lambda_name): - path = os.path.join(config.project_root, 'lambdas', lambda_name, 'app.py') - # Changing the module name here will break doctest discoverability - return load_module(path, f'lambdas.{lambda_name}.app') - - -def load_script(script_name: str): - path = os.path.join(config.project_root, 'scripts', f'{script_name}.py') - return load_module(path, script_name) - - -_loaded_dynamically = '__azul_loaded_dynamically__' - - -def module_loaded_dynamically(module_globals: Mapping[str, Any]) -> bool: - """ - Determine if a module was loaded dynamically - - :param module_globals: The return value of globals() when invoked from - within the module in question - - :return: True, if the module with the given globals was loaded dynamically - via a facility in this module, False otherwise - """ - return module_globals.get(_loaded_dynamically, False) diff --git a/src/azul/oauth2.py b/src/azul/oauth2.py deleted file mode 100644 index 2b1c116b8b..0000000000 --- a/src/azul/oauth2.py +++ /dev/null @@ -1,188 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections.abc import ( - Sequence, -) -import json -import logging -from typing import ( - TYPE_CHECKING, - TypedDict, -) - -import attr -from furl import ( - furl, -) -from google.auth.transport.urllib3 import ( - AuthorizedHttp, -) -from google.oauth2.credentials import ( - Credentials as TokenCredentials, -) -from google.oauth2.service_account import ( - Credentials as ServiceAccountCredentials, -) -import urllib3.request - -from azul import ( - R, - cached_property, - config, - reject, - require, -) -from azul.http import ( - HasCachedHttpClient, - HttpClientDecorator, -) - -log = logging.getLogger(__name__) - -ScopedCredentials = ServiceAccountCredentials | TokenCredentials - - -class CredentialsProvider(metaclass=ABCMeta): - - @abstractmethod - def scoped_credentials(self) -> ScopedCredentials: - raise NotImplementedError - - @abstractmethod - def oauth2_scopes(self) -> Sequence[str]: - raise NotImplementedError - - -class TokenInfo(TypedDict): - azp: str # "713613812354-aelk662bncv14d319dk8juce9p11um00.apps.googleusercontent.com", - aud: str # "713613812354-aelk662bncv14d319dk8juce9p11um00.apps.googleusercontent.com", - sub: str # "105096702580025601450", - scope: str # "https://www.googleapis.com/auth/userinfo.email openid", - exp: str # "1689645319", - expires_in: str # "3511", - email: str # "hannes@ucsc.edu", - email_verified: str # "true", - access_type: str # "online" - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class OAuth2Client(HasCachedHttpClient): - credentials_provider: CredentialsProvider - - @property - def credentials(self) -> ScopedCredentials: - return self.credentials_provider.scoped_credentials() - - @property - def service_account_credentials(self) -> ServiceAccountCredentials: - credentials = self.credentials - assert isinstance(credentials, ServiceAccountCredentials), R( - 'Expecting service account credentials', type(credentials) - ) - return credentials - - # The AuthorizedHttp class declares the second constructor argument to be a - # PoolManager instance but, except for __del__, doesn't actually use methods - # from the latter, only those from RequestMethods, at least in the scenarios - # we use AuthorizedHttp in. The AuthorizedHttp.__del__ method calls `clear` - # on the wrapped instance, so this adapter only provides that. - # - if TYPE_CHECKING: - _PoolManagerAdapter = urllib3.PoolManager - else: - class _PoolManagerAdapter(HttpClientDecorator): - - def clear(self): - pass - - def _create_http_client(self) -> urllib3.request.RequestMethods: - """ - A urllib3 HTTP client with OAuth 2.0 credentials - """ - # By default, AuthorizedHTTP attempts to refresh the credentials on a - # 401 response, which is never helpful. When using service account - # credentials, a fresh token is obtained for every lambda invocation, - # which will never persist long enough for the token to expire. User - # tokens can expire, but attempting to refresh them raises - # `google.auth.exceptions.RefreshError` due to the credentials not being - # configured with (among other fields) the client secret. - # - return AuthorizedHttp(self.credentials, - self._PoolManagerAdapter(super()._create_http_client()), - refresh_status_codes=()) - - @cached_property - def _http_client_without_credentials(self) -> urllib3.request.RequestMethods: - """ - A urllib3 HTTP client for making unauthenticated requests - """ - return super()._create_http_client() - - def validate(self): - """ - Validate the credentials from the provider this client was initialized - with. Raises an exception if the credentials are invalid, or if their - validity cannot be determined. - - For a user's access token to be valid, it must not be expired, and - originate from a Google OAuth 2.0 client belonging to the current - Google Cloud project. - - For service account credentials (those with a private key) to be valid, - the associated access token must not be expired and the email associated - with the token must be that of the service account itself. - - For a service account's access token (a bare access token created from - the service account's private key by some other party) to be valid, the - token must not be expired and the service account must belong to the - current Google Cloud project. - - :raise RequirementError: if the token is definitely invalid - - :raise Exception: if the validity of the token cannot be determined - """ - credentials = self.credentials - url = furl(url='https://www.googleapis.com/oauth2/v3/tokeninfo', - args=dict(access_token=credentials.token)) - response = self._http_client_without_credentials.request('GET', str(url)) - reject(response.status == 400, - 'The token is not valid') - require(response.status == 200, - 'Unexpected response status', response.status) - token_info: TokenInfo = json.loads(response.data) - # The error messages here intentionally lack detail, for confidentiality - if isinstance(credentials, ServiceAccountCredentials): - # Actual service account credentials - require(token_info['email_verified'] == 'true', - 'Service account email is not verified') - require(token_info['email'] == credentials.service_account_email, - 'Service account email does not match') - elif isinstance(credentials, TokenCredentials): - authorized_party = token_info['azp'] - email = token_info.get('email') - if authorized_party.endswith('.apps.googleusercontent.com'): - # A user's access token originating from an OAuth 2.0 client - azul_client_id = config.google_oauth2_client_id - reject(azul_client_id is None, - 'Acceptance of OAuth 2.0 user access tokens is disabled') - project_id = self._project_id_from_client_id(azul_client_id) - authorized_project_id = self._project_id_from_client_id(authorized_party) - require(project_id == authorized_project_id, - 'OAuth 2.0 client project does not match') - elif email is not None and email.endswith('.iam.gserviceaccount.com'): - # A service account's bare access token - require(token_info['email_verified'] == 'true', - 'Service account email is not verified') - local_part, _, host = email.partition('@') - host, _, domain = host.partition('.') - require(host == config.google_project(), - 'Service account project does not match') - else: - assert False, 'Unexpected type of authorized party' - else: - assert False, type(credentials) - - def _project_id_from_client_id(self, client_id): - return client_id.split('-', 1)[0] diff --git a/src/azul/objects.py b/src/azul/objects.py deleted file mode 100644 index a05c4c3bd7..0000000000 --- a/src/azul/objects.py +++ /dev/null @@ -1,71 +0,0 @@ -from azul import ( - lru_cache, -) - - -class InternMeta(type): - """ - A metaclass that interns instances of its instances such that the invariant - (x == y) == (x is y) holds for all instances x and y of any instance of this - metaclass. Note that an instance of a metaclass is a class. - - This metaclass does not consider thread safety. It should be as safe or - unsafe as lru_cache from functools. - - Note also that this metaclass never releases the memory used by instances of - its instances. - - >>> from typing import ClassVar - - >>> class C(metaclass=InternMeta): - ... i: ClassVar[int] = 0 - ... def __init__(self, x:int): - ... C.i += 1 - ... self.x = x - - >>> C(1) is C(1) - True - >>> C.i - 1 - >>> C(1) is C(2) - False - >>> C.i - 2 - - Instances of an instance of this metaclass should be immutable. - - >>> from dataclasses import dataclass, field - >>> @dataclass - ... class D(metaclass=InternMeta): - ... x: int - >>> d1, d2 = D(1), D(2) - >>> d1 == d2 - False - >>> d2.x = 1 # make them equal - >>> d1 == d2 - True - >>> d1 is d2 # but they are still not the same, violating the invariant. - False - - Instances of an instance are interned based on the arguments they were - constructed with. That means that instance equality must be consistent with - the equality of the construction arguments. If it isn't i.e., if two - instances are equal even if their construction arguments are not, the - invariant will be violated. - - >>> @dataclass - ... class E(metaclass=InternMeta): - ... x: int - ... y: int = field(compare=False) - >>> e1, e2 = E(1, 1), E(1, 2) - >>> e1.y == e2.y # Even though .y is differs between instances … - False - >>> e1 == e2 # they are considered equal because .y is insignificant for equality. - True - >>> e1 is e2 # Invariant is invalidated. - False - """ - - @lru_cache - def __call__(cls, *args, **kwargs): - return super().__call__(*args, **kwargs) diff --git a/src/azul/plugins/__init__.py b/src/azul/plugins/__init__.py deleted file mode 100644 index 280d6f20c7..0000000000 --- a/src/azul/plugins/__init__.py +++ /dev/null @@ -1,894 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from enum import ( - Enum, -) -import importlib -from inspect import ( - isabstract, -) -from typing import ( - AbstractSet, - Callable, - ClassVar, - Iterable, - Mapping, - Self, - Sequence, - TYPE_CHECKING, - TypeVar, - TypedDict, - cast, -) - -import attrs -from more_itertools import ( - one, -) - -from azul import ( - CatalogName, - cached_property, - config, -) -from azul.attrs import ( - SerializableAttrs, -) -from azul.chalice import ( - Authentication, -) -from azul.digests import ( - Digest, -) -from azul.drs import ( - DRSClient, -) -from azul.indexer import ( - Bundle, - Prefix, - SourceRef, - SourceSpec, - SourcedBundleFQID, -) -from azul.indexer.document import ( - Aggregate, - DocumentType, - EntityType, - FieldPath, - FieldPathElement, - IndexName, -) -from azul.indexer.transform import ( - ReplicaTransformer, - Transformer, -) -from azul.types import ( - JSON, - MutableJSON, - MutableJSONs, - derived_type_params, - json_str, -) -from azul.uuids import ( - validate_uuid_prefix, -) - -if TYPE_CHECKING: - from azul.service.elasticsearch_service import ( - AggregationStage, - FilterStage, - ) - # These are only needed for type hints and would otherwise introduce a - # circular import since the service layer heavily depends on the plugin. - from azul.service.repository_service import ( - SearchResponseStage, - SummaryResponseStage, - ) - -FieldName = str - -FieldMapping = Mapping[FieldName, FieldPath] - -ColumnMapping = Mapping[FieldPathElement, FieldName | None] -ManifestConfig = Mapping[FieldPath, ColumnMapping] -MutableColumnMapping = dict[FieldPathElement, FieldName] -MutableManifestConfig = dict[FieldPath, MutableColumnMapping] - -DottedFieldPath = str -FieldGlobs = list[DottedFieldPath] - - -def dotted(path_or_element: FieldPathElement | FieldPath, - *elements: FieldPathElement - ) -> DottedFieldPath: - dot = '.' - if isinstance(path_or_element, FieldPathElement): - # The dotted('field') case is pointless, so we won't special-case it - return dot.join((path_or_element, *elements)) - elif elements: - return dot.join((*path_or_element, *elements)) - else: - return dot.join(path_or_element) - - -class DocumentSlice(TypedDict, total=False): - """ - Also known in Elasticsearch land as a *source filter*, but that phrase has - a different meaning in Azul. - - https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-fields.html#source-filtering - """ - includes: FieldGlobs - excludes: FieldGlobs - - -@attrs.frozen(auto_attribs=True, kw_only=True) -class Sorting: - field_name: FieldName - descending: bool = attrs.field(default=False) - max_page_size: int = 1000 - - @property - def order(self) -> str: - return 'desc' if self.descending else 'asc' - - -@attrs.frozen(auto_attribs=True, kw_only=True) -class SpecialFields: - """ - Azul defines a number of fields in each /index/{entity_type} response that - are synthetic (not directly taken from the metadata) and/or are used - internally. Their naming is inconsistent between metadata plugin - implementations. This class encapsulates the naming of these fields so that - we don't need to litter the source with strings literals and conditionals. - - It is an incomplete abstraction in that it does not express the name of the - inner entity the field is a property of in the /index/{entity_type} - response. In that way, the values of the attributes of instances of this - class are more akin to a facet name, rather than a field name. However, not - every field represented here is actually a facet. - """ - accessible: ClassVar[FieldName] = 'accessible' - source_id: FieldName - source_spec: FieldName - bundle_uuid: FieldName - bundle_version: FieldName - root_entity_id: FieldName - - -class ManifestFormat(Enum): - compact = 'compact' - terra_pfb = 'terra.pfb' - curl = 'curl' - verbatim_jsonl = 'verbatim.jsonl' - verbatim_pfb = 'verbatim.pfb' - - -class Plugin[BUNDLE: Bundle](metaclass=ABCMeta): - """ - A base class for Azul plugins. Concrete plugins shouldn't inherit this - class directly but one of the subclasses of this class. This class just - defines the mechanism for loading concrete plugins classes and doesn't - specify any interface to the concrete plugin itself. - """ - - @classmethod - def load(cls, catalog: CatalogName) -> type[Self]: - """ - Load and return one of the concrete subclasses of the class this method - is called on. Which concrete class is returned depends on how the - catalog is configured. Different catalogs can use different combinations - of concrete plugin implementations. - - :param catalog: the name of the catalog for which to load the plugin - """ - assert cls != Plugin, f'Must use a subclass of {cls.__name__}' - assert isabstract(cls), f'Must use an abstract subclass of {cls.__name__}' - plugin_type_name = cls._plugin_type_name() - plugin_package_name = config.catalogs[catalog].plugins[plugin_type_name].name - return cls._load(plugin_type_name, plugin_package_name) - - @classmethod - def types(cls) -> Sequence[type[Self]]: - return cls.__subclasses__() - - @classmethod - def type_for_name(cls, plugin_type_name: str) -> type[Self]: - """ - Return the plugin type for the given name. - - Note that the returned class is still abstract. To get a concrete - implementation of a particular plugin type, call the :meth:`.load` - method of the class returned by this method. The need to call this - method is uncommon. Depending on the purpose, say, interacting with - the repository, a client usually knows the abstract type of plugin - they'd like to use i.e., :class:`RepositoryPlugin`. The only thing - they don't know is which concrete implementation of that class to - use, as that depends on the catalog. - """ - for subclass in cls.types(): - if subclass.type_name() == plugin_type_name: - return subclass - raise ValueError('No such plugin type', plugin_type_name) - - @classmethod - @abstractmethod - def type_name(cls) -> str: - raise NotImplementedError - - @classmethod - def bundle_cls(cls, - plugin_package_name: str - ) -> type[BUNDLE]: - plugin_type_name = cls._plugin_type_name() - plugin_cls = cls._load(plugin_type_name, plugin_package_name) - bundle_cls = derived_type_params(plugin_cls, root=Plugin)[BUNDLE] - assert isinstance(bundle_cls, type) - assert issubclass(bundle_cls, Bundle), bundle_cls - return cast(type[BUNDLE], bundle_cls) - - @classmethod - def _plugin_type_name(cls) -> str: - assert cls != Plugin, f'Must use a subclass of {cls.__name__}' - assert isabstract(cls) != Plugin, f'Must use an abstract subclass of {cls.__name__}' - plugin_type_name = cls.type_name() - return plugin_type_name - - @classmethod - def _load(cls, plugin_type_name: str, plugin_package_name: str) -> type[Self]: - plugin_package_path = f'{__name__}.{plugin_type_name}.{plugin_package_name}' - plugin_module = importlib.import_module(plugin_package_path) - plugin_cls = getattr(plugin_module, 'Plugin') - assert issubclass(plugin_cls, cls) - return plugin_cls - - -class MetadataPlugin[BUNDLE: Bundle](Plugin[BUNDLE]): - - @classmethod - def type_name(cls) -> str: - return 'metadata' - - # If the need arises to parameterize instances of a concrete plugin class, - # add the parameters to create() and make it abstract. - - @classmethod - def create(cls) -> Self: - return cls() - - @abstractmethod - def transformer_types(self) -> Iterable[type[Transformer]]: - raise NotImplementedError - - @abstractmethod - def transformers(self, - bundle: BUNDLE, - *, - delete: bool - ) -> Iterable[Transformer]: - """ - Instantiate all transformer classes. - - :param bundle: the bundle to initialize the transformers with - - :param delete: whether the bundle was deleted - """ - raise NotImplementedError - - def aggregate_class(self) -> type[Aggregate]: - """ - Returns the concrete class to use for representing aggregate documents - in the indexer. - """ - return Aggregate - - @property - def string_mapping(self): - return { - 'type': 'text', - 'fields': { - 'keyword': { - 'type': 'keyword', - 'ignore_above': 256 - } - } - } - - range_mapping = { - # A float (single precision IEEE-754) can represent all integers up to - # 16,777,216. If we used float values for organism ages in seconds, we - # would not be able to accurately represent an organism age of - # 16,777,217 seconds. That is 194 days and 15617 seconds. - # A double precision IEEE-754 representation loses accuracy at - # 9,007,199,254,740,993 which is more than 285616415 years. - - # Note that Python's float uses double precision IEEE-754. - # (https://docs.python.org/3/tutorial/floatingpoint.html#representation-error) - 'type': 'double_range' - } - - def mapping(self, index_name: IndexName) -> MutableJSON: - return { - 'numeric_detection': False, - 'properties': { - # Declare the primary key since it's used as the tiebreaker when - # sorting. We used to use _uid for that but that's gone in ES 7 and - # _id can't be used for sorting: - # - # https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html#uid-meta-field-removed - # - # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html - # - # > The _id field is restricted from use in aggregations, sorting, - # > and scripting. In case sorting or aggregating on the _id field - # > is required, it is advised to duplicate the content of the _id - # > field into another field that has doc_values enabled. - # - 'entity_id': self.string_mapping, - **( - { - 'contents': { - # All replicas are stored in a single index per catalog, - # regardless of entity type, resulting in heterogeneous - # documents. Additionally, we don't want ES re-ordering - # arrays or dictionary items within replica documents. - # Therefore, we disable the mapping for their contents. - 'type': 'object', - 'enabled': False - } - } - if index_name.doc_type is DocumentType.replica else - {} - ) - }, - 'dynamic_templates': [ - { - 'strings_as_text': { - 'match_mapping_type': 'string', - 'mapping': self.string_mapping - } - }, - { - 'other_types_with_keyword': { - 'match_mapping_type': '*', - 'mapping': { - 'type': '{dynamic_type}', - 'fields': { - 'keyword': { - 'type': '{dynamic_type}' - } - } - } - } - } - ] - } - - @property - @abstractmethod - def exposed_indices(self) -> dict[EntityType, Sorting]: - """ - The indices for which the service provides an `/index/…` endpoint. - The return value maps the outer entity type of each exposed index to the - default values of the request parameters that control the paging and - ordering of hits returned by the corresponding endpoint. - """ - raise NotImplementedError - - @property - @abstractmethod - def manifest_formats(self) -> Sequence[ManifestFormat]: - """ - The supported formats for generating a manifest. The first value will be - used as a default if no format is explicitly specified. - """ - raise NotImplementedError - - #: See :meth:`_field_mapping` - _FieldMapping2 = Mapping[FieldPathElement, FieldName] - _FieldMapping1 = Mapping[FieldPathElement, FieldName | _FieldMapping2] - _FieldMapping = Mapping[FieldPathElement, FieldName | _FieldMapping1] - - @cached_property - def field_mapping(self) -> FieldMapping: - """ - Maps a field's name in the service response to the field's path in - Elasticsearch index documents. - """ - - def invert(v: MetadataPlugin._FieldMapping, - *path: FieldPathElement - ) -> Iterable[tuple[FieldName, FieldPath]]: - if isinstance(v, dict): - for k, v in v.items(): - assert isinstance(k, FieldPathElement) - yield from invert(v, *path, k) - elif isinstance(v, FieldName): - yield v, path - else: - assert False, v - - inversion: dict[FieldName, FieldPath] = {} - for v, path in invert(self._field_mapping): - other_path = inversion.setdefault(v, path) - assert other_path == path, ( - f'Field {v!r} has conflicting paths', path, other_path - ) - return inversion - - @property - @abstractmethod - def _field_mapping(self) -> _FieldMapping: - """ - An inverted and more compact representation of the field mapping. It is - made up of nested dictionaries where each key is an element in a field's - path whereas the corresponding value is either the field's name, if the - key represents the element in the path, or a dictionary otherwise. - """ - raise NotImplementedError - - @property - @abstractmethod - def special_fields(self) -> SpecialFields: - """ - See :py:class:`SpecialFields`. - """ - raise NotImplementedError - - @property - def root_entity_type(self) -> str: - """ - The type of entity that sits at the root of the entity graph, and that - all other entities are directly or indirectly associated with. - Typically, entities of other types are thought of as *belonging to* the - root entity and this relationship is implied rather than made explicit - via a foreign key or some other manifestation of a graph connection. The - mere presence of a `project` entity in a TDR snapshot for HCA, for - example, implies that all other entities in that snapshot *belong* to - that project. - """ - raise NotImplementedError - - @property - def hot_entity_types(self) -> Iterable[str]: - """ - The types of inner entities that do not explicitly track their hubs in - replica documents in order to avoid a large list of hub references in - the replica document, and to avoid contention when updating that list - during indexing. This will always include the root type. - """ - replica_transformer_type = one( - t for t in self.transformer_types() - if issubclass(t, ReplicaTransformer) - ) - hot_entity_types = replica_transformer_type.hot_entity_types().values() - assert self.root_entity_type in hot_entity_types - return hot_entity_types - - @property - def facets(self) -> Sequence[str]: - return [self.special_fields.source_id] - - @property - @abstractmethod - def manifest_config(self) -> ManifestConfig: - raise NotImplementedError - - def verbatim_pfb_entity_id(self, replica: JSON) -> str: - return json_str(replica['entity_id']) - - def verbatim_pfb_schema(self, replicas: list[JSON]) -> list[JSON]: - """ - Generate the azul-specific parts of the PFB schema for the verbatim - manifest. The default, metadata-agnostic implementation loads all - replica documents into memory and dynamically generates a schema based - on their observed shapes. This results in inconsistencies in the schema - depending on the manifest contents, so subclasses should override this - method if their metadata adheres to an authoritative schema that can be - known in advance. - - :param replicas: The replica documents to be described by the PFB schema - - :return: a list of PFB entity schemas describing the replicas - """ - from azul.service import ( - avro_pfb, - ) - return avro_pfb.pfb_schema_from_replicas(replicas) - - def verbatim_pfb_relations(self, replica: JSON) -> list[tuple[str, str]]: - """ - A list of the replicas that the given replica references/depends on, - represented as (replica_type, entity_id) pairs. - """ - return [] - - def verbatim_pfb_links(self, replica_type: str) -> MutableJSONs: - """ - Express the relationships of the given replica type as PFB links - (https://uc-cdis.github.io/pypfb/#link). - """ - return [] - - @abstractmethod - def document_slice(self, entity_type: str) -> DocumentSlice | None: - raise NotImplementedError - - @property - @abstractmethod - def summary_response_stage(self) -> 'type[SummaryResponseStage]': - raise NotImplementedError - - @property - @abstractmethod - def search_response_stage(self) -> 'type[SearchResponseStage]': - raise NotImplementedError - - @property - @abstractmethod - def summary_aggregation_stage(self) -> 'type[AggregationStage]': - raise NotImplementedError - - @property - @abstractmethod - def aggregation_stage(self) -> 'type[AggregationStage]': - raise NotImplementedError - - @property - @abstractmethod - def filter_stage(self) -> 'type[FilterStage]': - raise NotImplementedError - - @property - @abstractmethod - def file_class(self) -> type['File']: - raise NotImplementedError - - -@attrs.frozen(auto_attribs=True, kw_only=True) -class RepositoryPlugin[BUNDLE: Bundle, - SOURCE_SPEC: SourceSpec, - SOURCE_REF: SourceRef, - BUNDLE_FQID: SourcedBundleFQID]( - Plugin[BUNDLE] -): - catalog: CatalogName - - @classmethod - def type_name(cls) -> str: - return 'repository' - - @classmethod - def create(cls, catalog: CatalogName) -> Self: - """ - Return a plugin instance suitable for populating the given catalog. - """ - return cls(catalog=catalog) - - @cached_property - def sources(self) -> AbstractSet[SOURCE_SPEC]: - """ - The sources the plugin is configured to read metadata from. - """ - return frozenset(map(self.parse_source, config.sources(self.catalog))) - - def _assert_source(self, source: SOURCE_REF): - """ - Assert that the given source is present in the plugin configuration. - """ - assert source.spec.prefix is not None, source - for configured_spec in self.sources: - if configured_spec == source.spec: - break - # Most configured sources lack an explicit prefix - elif configured_spec.eq_ignoring_prefix(source.spec): - assert configured_spec.prefix is None, (configured_spec, source) - break - else: - continue - else: - assert False, (self.sources, source) - - def _assert_partition(self, source: SOURCE_REF, prefix: str): - """ - Assert that the given partition is a valid derivation of the given - source's configured prefix. - """ - validate_uuid_prefix(prefix) - assert prefix in source.spec.prefix, (source.spec, prefix) - - @abstractmethod - def list_sources(self, - authentication: Authentication | None - ) -> Iterable[SOURCE_REF]: - """ - The sources the plugin is configured to read metadata from that are - accessible using the provided authentication. Retrieving this - information may require a round-trip to the underlying repository. - Implementations should raise PermissionError if the provided - authentication is insufficient to access the repository. - """ - raise NotImplementedError - - def list_source_ids(self, - authentication: Authentication | None - ) -> set[str]: - """ - List source IDs in the underlying repository that are accessible using - the provided authentication. Sources may be included even if they are - not configured to be read from. Subclasses should override this method - if it can be implemented more efficiently than `list_sources`. - - Retrieving this information may require a round-trip to the underlying - repository. Implementations should raise PermissionError if the provided - authentication is insufficient to access the repository. - """ - return {source.id for source in self.list_sources(authentication)} - - @cached_property - def _generic_params(self) -> dict[TypeVar, type]: - params = derived_type_params(type(self), root=RepositoryPlugin) - assert all(isinstance(p, type) for p in params.values()), params - return cast(dict[TypeVar, type], params) - - @property - def source_ref_cls(self) -> type[SOURCE_REF]: - ref_cls = self._generic_params[SOURCE_REF] - assert isinstance(ref_cls, type), ref_cls - assert issubclass(ref_cls, SourceRef), ref_cls - return cast(type[SOURCE_REF], ref_cls) - - @property - def bundle_fqid_cls(self) -> type[BUNDLE_FQID]: - fqid_cls = self._generic_params[BUNDLE_FQID] - assert isinstance(fqid_cls, type), fqid_cls - assert issubclass(fqid_cls, SourcedBundleFQID), fqid_cls - return cast(type[BUNDLE_FQID], fqid_cls) - - def parse_source(self, spec: str) -> SOURCE_SPEC: - return self.source_ref_cls.spec_cls().parse(spec) - - def resolve_source(self, spec: SOURCE_SPEC) -> SOURCE_REF: - """ - Return an instance of :class:`SourceRef` for the repository source - matching the given specification or raise an exception if no such source - exists. - """ - ref_cls = self.source_ref_cls - spec_cls = ref_cls.spec_cls() - assert isinstance(spec, spec_cls), spec - id = self._lookup_source_id(spec) - return ref_cls(id=id, spec=spec) - - @abstractmethod - def _lookup_source_id(self, spec: SOURCE_SPEC) -> str: - """ - Return the ID of the repository source with the specified name or raise - an exception if no such source exists. - """ - raise NotImplementedError - - @abstractmethod - def count_bundles(self, source: SOURCE_SPEC) -> int: - """ - The total number of subgraphs in the given source. The source's prefix - may be None, indicating that the source hasn't been partitioned yet and - that this method should count all bundles in the source. - """ - raise NotImplementedError - - @abstractmethod - def count_files(self, source: SOURCE_SPEC) -> int: - """ - The total number of files in the given source. The source's prefix - may be None, indicating that the source hasn't been partitioned yet and - that this method should count all files in the source. - """ - raise NotImplementedError - - def partition_source_for_indexing(self, - catalog: CatalogName, - source: SOURCE_REF - ) -> SOURCE_REF: - """ - If the source already has a prefix, return the source. Otherwise, return - an updated copy of the source with a heuristically computed prefix that - should be appropriate for indexing in the given catalog. - """ - return self._partition_source(catalog, source, self.count_bundles) - - def partition_source_for_mirroring(self, - catalog: CatalogName, - source: SOURCE_REF - ) -> SOURCE_REF: - """ - If the source already has a prefix, return the source. Otherwise, return - an updated copy of the source with a heuristically computed prefix that - should be appropriate for mirroring in the given catalog. - """ - return self._partition_source(catalog, source, self.count_files) - - def _partition_source(self, - catalog: CatalogName, - source: SOURCE_REF, - counter: Callable[[SOURCE_SPEC], int] - ) -> SOURCE_REF: - if source.spec.prefix is None: - count = counter(source.spec) - is_main = config.deployment.is_main - is_it = catalog in config.integration_test_catalogs - # We use the "lesser" heuristic during IT to keep the cost and - # performance of the tests within reasonable limits - if is_main and not is_it: - prefix = Prefix.for_main_deployment(count) - else: - prefix = Prefix.for_lesser_deployment(count) - return source.with_prefix(prefix) - else: - return source - - @abstractmethod - def list_bundles(self, - source: SOURCE_REF, - prefix: str - ) -> list[BUNDLE_FQID]: - """ - List the bundles in the given source whose UUID starts with the given - prefix. - - :param source: a reference to the repository source that contains the - bundles to list - - :param prefix: a string that should be no more than eight lower-case - hexadecimal characters - """ - - raise NotImplementedError - - @abstractmethod - def fetch_bundle(self, bundle_fqid: BUNDLE_FQID) -> BUNDLE: - """ - Fetch the given bundle. - - :param bundle_fqid: The fully qualified ID of the bundle to fetch, - including its source. - """ - raise NotImplementedError - - @abstractmethod - def list_files(self, source: SOURCE_REF, prefix: str) -> list['File']: - """ - List the files in the given source whose digest value starts with the - given prefix. - - :param source: A reference to the repository source that contains the - files to list - - :param prefix: A string of lower-case hexadecimal characters - """ - - raise NotImplementedError - - @abstractmethod - def drs_client(self, - authentication: Authentication | None = None - ) -> DRSClient: - """ - Returns a DRS client that uses the given authentication with requests to - the DRS server. If a concrete subclass doesn't support authentication, - it should assert that the argument is ``None``. - """ - raise NotImplementedError - - @abstractmethod - def file_download_class(self) -> type['RepositoryFileDownload']: - raise NotImplementedError - - @abstractmethod - def validate_version(self, version: str) -> None: - """ - Raise ValueError if the given version string is invalid. - """ - raise NotImplementedError - - -@attrs.frozen(auto_attribs=True, kw_only=True) -class File(SerializableAttrs, metaclass=ABCMeta): - """ - A reference to a data file in the repository. - """ - - #: The UUID of the data file. Some plugins use the same UUID for the - #: file's metadata document. - uuid: str - - #: The name of the file on the user's disk. - name: str - - #: Optional version of the file. Defaults to the most recent version. - version: str | None - - #: The DRS URI of the file in the repository from which to download the - #: file. - #: - #: https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/#_drs_ids - #: - #: Repository plugins that populate the DRS URI (``azul.indexer.Bundle. - #: drs_uri``) usually require this to be set. Plugins that don't will - #: ignore this. - drs_uri: str | None - - #: The file's size on disk, if known. - size: int | None = None - - #: The file's MIME content type, if known - content_type: str | None = None - - @classmethod - @abstractmethod - def from_hit(cls, hit: JSON) -> Self: - """ - Instantiate this class from an entity aggregate document retrieved from - Elasticsearch. - """ - raise NotImplementedError - - @property - @abstractmethod - def digest(self) -> Digest: - raise NotImplementedError - - -@attrs.define(auto_attribs=True, kw_only=True) -class RepositoryFileDownload(metaclass=ABCMeta): - #: The file being downloaded - file: File - - #: True if the download of a file requires its DRS URI - needs_drs_uri: ClassVar[bool] = False - - #: The name of the replica to download the file from. Defaults to the name - #: of the default replica. The set of valid replica names depends on the - #: repository, but each repository must support the default replica. - replica: str | None - - #: A token to capture download state in. Should be `None` when the download - #: is first requested. - token: str | None - - @abstractmethod - def update(self, - plugin: RepositoryPlugin, - authentication: Authentication | None - ) -> None: - """ - Initiate the preparation of a URL from which the file can be downloaded. - Set any attributes that are None to their default values. If a download - is already being prepared, update those attributes and set the - `retry_after` property. If the download has been prepared, set the - `location` property. - - :param plugin: The plugin for the repository from which the file is to - be downloaded. - - :param authentication: The authentication provided with the download - request. - """ - raise NotImplementedError - - @property - @abstractmethod - def location(self) -> str | None: - """ - The final URL from which the file contents can be downloaded. - """ - raise NotImplementedError - - @property - @abstractmethod - def retry_after(self) -> int | None: - """ - A number of seconds to wait before calling `update` again. - """ diff --git a/src/azul/plugins/metadata/__init__.py b/src/azul/plugins/metadata/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py deleted file mode 100644 index 487582e4b8..0000000000 --- a/src/azul/plugins/metadata/anvil/__init__.py +++ /dev/null @@ -1,537 +0,0 @@ -from collections import ( - defaultdict, -) -from operator import ( - itemgetter, -) -from typing import ( - Iterable, - Self, - Sequence, -) - -from attrs import ( - frozen, -) -from more_itertools import ( - one, -) -from more_itertools.more import ( - always_iterable, -) - -from azul import ( - config, - iif, -) -from azul.digests import ( - Digest, -) -from azul.indexer.document import ( - DocumentType, - EntityType, - FieldPath, - IndexName, -) -from azul.plugins import ( - DocumentSlice, - File, - ManifestConfig, - MetadataPlugin, - Sorting, - SpecialFields, -) -from azul.plugins.metadata.anvil.bundle import ( - AnvilBundle, -) -from azul.plugins.metadata.anvil.indexer.transform import ( - ActivityTransformer, - BaseTransformer, - BiosampleTransformer, - BundleTransformer, - DatasetTransformer, - DonorTransformer, - FileTransformer, -) -from azul.plugins.metadata.anvil.schema import ( - anvil_schema, -) -from azul.plugins.metadata.anvil.service.aggregation import ( - AnvilAggregationStage, - AnvilSummaryAggregationStage, -) -from azul.plugins.metadata.anvil.service.filter import ( - AnvilFilterStage, -) -from azul.plugins.metadata.anvil.service.response import ( - AnvilSearchResponseStage, - AnvilSummaryResponseStage, -) -from azul.service.manifest_service import ( - ManifestFormat, -) -from azul.types import ( - AnyMutableJSON, - JSON, - MutableJSON, - MutableJSONs, -) - - -class Plugin(MetadataPlugin[AnvilBundle]): - - @property - def exposed_indices(self) -> dict[EntityType, Sorting]: - return dict( - activities=Sorting(field_name='activities.activity_id'), - biosamples=Sorting(field_name='biosamples.biosample_id'), - bundles=Sorting(field_name=self.special_fields.bundle_uuid), - datasets=Sorting(field_name='datasets.dataset_id'), - donors=Sorting(field_name='donors.donor_id'), - files=Sorting(field_name='files.file_id'), - ) - - @property - def manifest_formats(self) -> Sequence[ManifestFormat]: - return [ - ManifestFormat.compact, - ManifestFormat.terra_pfb, - *iif(config.enable_replicas, [ - ManifestFormat.verbatim_jsonl, - ManifestFormat.verbatim_pfb - ]) - ] - - def transformer_types(self) -> Iterable[type[BaseTransformer]]: - return ( - ActivityTransformer, - BiosampleTransformer, - BundleTransformer, - DatasetTransformer, - DonorTransformer, - FileTransformer, - ) - - def transformers(self, - bundle: AnvilBundle, - *, - delete: bool - ) -> Iterable[BaseTransformer]: - return [ - transformer_cls(bundle=bundle, deleted=delete) - for transformer_cls in self.transformer_types() - ] - - def mapping(self, index_name: IndexName) -> MutableJSON: - mapping = super().mapping(index_name) - if index_name.doc_type in (DocumentType.contribution, DocumentType.aggregate): - def range_mapping(name: str, path: str) -> MutableJSON: - return { - name: { - 'path_match': path, - 'mapping': self.range_mapping - } - } - - mapping['dynamic_templates'].extend([ - range_mapping('biosample_age_range', 'contents.biosamples.donor_age_at_collection'), - range_mapping('diagnosis_age_range', 'contents.diagnoses.diagnosis_age'), - range_mapping('diagnosis_onset_age_range', 'contents.diagnoses.diagnosis_onset_age') - ]) - return mapping - - @property - def _field_mapping(self) -> MetadataPlugin._FieldMapping: - common_fields = [ - 'document_id', - 'source_datarepo_row_ids' - ] - return { - 'entity_id': 'entryId', - 'bundles': { - # These field paths have a brittle coupling that must be - # maintained to the field lookups in `self.manifest_config`. - 'uuid': self.special_fields.bundle_uuid, - 'version': self.special_fields.bundle_version - }, - 'sources': { - # These field paths have a brittle coupling that must be - # maintained to the field lookups in `self.manifest_config`. - 'id': self.special_fields.source_id, - 'spec': self.special_fields.source_spec - }, - 'contents': { - 'datasets': { - f: f'datasets.{f}' for f in [ - *common_fields, - 'dataset_id', - 'consent_group', - 'data_use_permission', - 'owner', - 'principal_investigator', - 'registered_identifier', - 'title', - 'data_modality', - # This field path has a brittle coupling that must be - # maintained to the field lookup in - # `self.manifest_config`. - 'duos_id', - ] - }, - 'donors': { - f: f'donors.{f}' for f in [ - *common_fields, - 'donor_id', - 'organism_type', - 'phenotypic_sex', - 'reported_ethnicity', - 'genetic_ancestry', - ] - }, - 'diagnoses': { - f: f'diagnoses.{f}' for f in [ - *common_fields, - 'diagnosis_id', - 'disease', - 'diagnosis_age_unit', - 'diagnosis_age', - 'onset_age_unit', - 'onset_age', - 'phenotype', - 'phenopacket' - ] - }, - 'biosamples': { - f: f'biosamples.{f}' for f in [ - *common_fields, - 'biosample_id', - 'anatomical_site', - 'apriori_cell_type', - 'biosample_type', - 'disease', - 'donor_age_at_collection_unit', - 'donor_age_at_collection', - ] - }, - 'activities': { - f: f'activities.{f}' for f in [ - *common_fields, - 'activity_id', - # This field path has a brittle coupling that must be - # maintained to the field lookup in - # `self.manifest_config`. - 'activity_table', - 'activity_type', - 'assay_type', - 'data_modality', - 'reference_assembly', - ] - }, - 'files': { - **{ - f: f'files.{f}' for f in [ - *common_fields, - 'file_id', - 'data_modality', - 'file_format', - 'file_size', - 'file_md5sum', - 'reference_assembly', - 'file_name', - 'is_supplementary', - # Not in schema - 'crc32', - 'sha256', - 'drs_uri', - ] - }, - # These field names are hard-coded in the implementation of - # the repository service/controller. Also, these field paths - # have a brittle coupling that must be maintained to the - # field lookups in `self.manifest_config`. - **{ - # Not in schema - 'version': 'fileVersion', - 'uuid': 'fileId', - } - } - } - } - - @property - def special_fields(self) -> SpecialFields: - return SpecialFields(source_id='source_id', - source_spec='source_spec', - bundle_uuid='bundle_uuid', - bundle_version='bundle_version', - root_entity_id='datasets.dataset_id') - - @property - def root_entity_type(self) -> str: - return 'datasets' - - @property - def facets(self) -> Sequence[str]: - return [ - *super().facets, - 'activities.activity_type', - 'activities.assay_type', - 'activities.data_modality', - 'biosamples.anatomical_site', - 'biosamples.biosample_type', - 'biosamples.disease', - 'diagnoses.disease', - 'diagnoses.phenotype', - 'diagnoses.phenopacket', - 'datasets.consent_group', - 'datasets.data_use_permission', - 'datasets.registered_identifier', - 'datasets.title', - 'donors.organism_type', - 'donors.phenotypic_sex', - 'donors.reported_ethnicity', - 'files.data_modality', - 'files.file_format', - 'files.reference_assembly', - 'files.is_supplementary', - ] - - @property - def manifest_config(self) -> ManifestConfig: - result = defaultdict(dict) - - # Note that there is a brittle coupling that must be maintained between - # the fields listed here and those used in `self._field_mapping`. - fields_to_omit_from_manifest = [ - ('contents', 'activities', 'activity_table'), - # We omit the `duos_id` field from manifests since there is only one - # DUOS bundle per dataset, and that bundle only contributes to outer - # entities of the `datasets` type, not to entities of the other - # types, such as files, which the manifest is generated from. - ('contents', 'datasets', 'duos_id'), - ('contents', 'files', 'uuid'), - ('contents', 'files', 'version'), - ] - - # Furthermore, renamed values should match the field's path in a - # response hit from the `/index/files` endpoint. - fields_to_rename_in_manifest = { - ('bundles', 'uuid'): 'bundles.bundle_uuid', - ('bundles', 'version'): 'bundles.bundle_version', - ('sources', 'id'): 'sources.source_id', - ('sources', 'spec'): 'sources.source_spec', - } - - def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath): - for path_element, name_or_type in mapping.items(): - new_path = (*path, path_element) - if isinstance(name_or_type, dict): - recurse(name_or_type, new_path) - elif isinstance(name_or_type, str): - if new_path == ('entity_id',): - pass - elif new_path in fields_to_omit_from_manifest: - result[path][path_element] = None - fields_to_omit_from_manifest.remove(new_path) - elif new_path in fields_to_rename_in_manifest: - result[path][path_element] = fields_to_rename_in_manifest.pop(new_path) - else: - result[path][path_element] = name_or_type - else: - assert False, (path, path_element, name_or_type) - - recurse(self._field_mapping, ()) - assert len(fields_to_omit_from_manifest) == 0, fields_to_omit_from_manifest - assert len(fields_to_rename_in_manifest) == 0, fields_to_rename_in_manifest - # The file URL is synthesized from the `uuid` and `version` fields. - # Above, we already configured these two fields to be omitted from the - # manifest since they are not informative to the user. - result[('contents', 'files')]['file_url'] = 'files.azul_file_url' - return result - - primary_keys_by_table = { - table['name']: one(table['primaryKey']) - for table in anvil_schema['tables'] - } - - foreign_keys_by_table = { - table['name']: [ - (r['to']['table'], r['from']['column']) - for r in anvil_schema['relationships'] - if r['from']['table'] == table['name'] - ] - for table in anvil_schema['tables'] - } - - def verbatim_pfb_entity_id(self, replica: JSON) -> str: - replica_type = replica['replica_type'] - try: - primary_key = self.primary_keys_by_table[replica_type] - except KeyError: - if replica_type == 'duos_dataset_registration': - return replica['contents']['duos_id'] - else: - return super().verbatim_pfb_entity_id(replica) - else: - return replica['contents'][primary_key] - - def verbatim_pfb_relations(self, replica: JSON) -> list[tuple[str, str]]: - table_name, contents = replica['replica_type'], replica['contents'] - try: - foreign_keys = self.foreign_keys_by_table[table_name] - except KeyError: - if table_name == 'duos_dataset_registration': - return [('anvil_dataset', contents['dataset_id'])] - else: - return super().verbatim_pfb_relations(replica) - else: - return [ - (foreign_table_name, foreign_key) - for (foreign_table_name, foreign_key_column) in foreign_keys - # AnVIL foreign keys may be either scalars (e.g. `anvil_diagnosis.donor_id`) - # or arrays (e.g. `anvil_activity.used_file_id`). Scalar foreign keys may be - # null; we should never observe null values in array columns thanks to - # BigQuery's type semantics: - # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array_nulls - for foreign_key in always_iterable(contents[foreign_key_column]) - ] - - def verbatim_pfb_links(self, replica_type: str) -> MutableJSONs: - return ( - [ - { - 'dst': 'anvil_dataset', - 'name': '', - 'multiplicity': 'ONE_TO_ONE' - } - ] - if replica_type == 'duos_dataset_registration' else - [ - { - 'dst': r['to']['table'], - 'name': r['name'], - # Each link is between a foreign key and a primary key. - # Primary keys are unique within their own table, but - # multiple rows in other tables can reference them. - 'multiplicity': 'MANY_TO_ONE', - } - for r in anvil_schema['relationships'] - if r['from']['table'] == replica_type - ] - ) - - def verbatim_pfb_schema(self, replicas: list[JSON]) -> list[JSON]: - table_schemas_by_name = { - schema['name']: schema - for schema in anvil_schema['tables'] - } - non_schema_replicas = [ - r for r in replicas - if r['replica_type'] not in table_schemas_by_name - ] - # For tables not described by the AnVIL schema, fall back to building - # their PFB schema dynamically from the shapes of the replicas - entity_schemas = super().verbatim_pfb_schema(non_schema_replicas) - # For the rest, use the AnVIL schema as the basis of the PFB schema - for table_name, table_schema in table_schemas_by_name.items(): - field_schemas = [ - self._pfb_schema_from_anvil_column(table_name=table_name, - column_name='datarepo_row_id', - anvil_datatype='string', - is_optional=False) - ] - if table_name == 'anvil_file': - field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name, - column_name='drs_uri', - anvil_datatype='string')) - for column_schema in table_schema['columns']: - field_schemas.append( - self._pfb_schema_from_anvil_column(table_name=table_name, - column_name=column_schema['name'], - anvil_datatype=column_schema['datatype'], - is_array=column_schema['array_of'], - is_optional=not column_schema['required']) - ) - - field_schemas.sort(key=itemgetter('name')) - entity_schemas.append({ - 'name': table_name, - 'type': 'record', - 'fields': field_schemas - }) - return entity_schemas - - def _pfb_schema_from_anvil_column(self, - *, - table_name: str, - column_name: str, - anvil_datatype: str, - is_array: bool = False, - is_optional: bool = True, - ) -> AnyMutableJSON: - _anvil_to_pfb_types = { - 'boolean': 'boolean', - 'float': 'double', - 'integer': 'long', - 'string': 'string', - 'fileref': 'string' - } - type_ = _anvil_to_pfb_types[anvil_datatype] - if is_optional: - type_ = ['null', type_] - if is_array: - type_ = { - 'type': 'array', - 'items': type_ - } - return { - 'name': column_name, - 'namespace': table_name, - 'type': type_, - } - - def document_slice(self, entity_type: str) -> DocumentSlice | None: - return None - - @property - def summary_response_stage(self) -> 'type[AnvilSummaryResponseStage]': - return AnvilSummaryResponseStage - - @property - def search_response_stage(self) -> 'type[AnvilSearchResponseStage]': - return AnvilSearchResponseStage - - @property - def summary_aggregation_stage(self) -> 'type[AnvilSummaryAggregationStage]': - return AnvilSummaryAggregationStage - - @property - def aggregation_stage(self) -> 'type[AnvilAggregationStage]': - return AnvilAggregationStage - - @property - def filter_stage(self) -> 'type[AnvilFilterStage]': - return AnvilFilterStage - - @property - def file_class(self) -> type[File]: - return AnvilFile - - -@frozen(kw_only=True) -class AnvilFile(File): - #: MD5 hash of the file's contents - md5: str - - @classmethod - def from_hit(cls, hit: JSON) -> Self: - return cls(uuid=hit['document_id'], - version=hit['version'], - name=hit['file_name'], - size=hit['file_size'], - drs_uri=hit['drs_uri'], - md5=hit['file_md5sum']) - - @property - def digest(self) -> Digest: - return Digest(value=self.md5, type='md5') diff --git a/src/azul/plugins/metadata/anvil/bundle.py b/src/azul/plugins/metadata/anvil/bundle.py deleted file mode 100644 index 7eedfab6b2..0000000000 --- a/src/azul/plugins/metadata/anvil/bundle.py +++ /dev/null @@ -1,121 +0,0 @@ -from abc import ( - ABCMeta, -) -from collections import ( - defaultdict, -) -from itertools import ( - chain, -) -from typing import ( - Mapping, - Self, -) - -import attrs - -from azul.attrs import ( - SerializableAttrs, - serializable, -) -from azul.collections import ( - aset, - none_safe_apply, -) -from azul.indexer import ( - Bundle, - SourcedBundleFQID, -) -from azul.indexer.document import ( - EntityReference, - EntityType, -) -from azul.types import ( - MutableJSON, -) - -# AnVIL snapshots do not use UUIDs for primary/foreign keys. This type alias -# helps us distinguish these keys from the document UUIDs, which are drawn from -# the `datarepo_row_id` column. Note that entities from different tables may -# have the same key, so `KeyReference` should be used when mixing keys from -# different entity types. -Key = str - - -@attrs.frozen(kw_only=True) -class KeyReference(SerializableAttrs): - key: Key - entity_type: EntityType - - -def ref_set_field(): - return serializable(attrs.field(), - from_json=lambda x: frozenset(map(EntityReference.parse, x)), - to_json=lambda x: sorted(map(str, x))) - - -@attrs.frozen(kw_only=True, order=False) -class Link[REF: EntityReference | KeyReference](SerializableAttrs): - inputs: frozenset[REF] = ref_set_field() - activity: REF | None = None - outputs: frozenset[REF] = ref_set_field() - - @property - def all_entities(self) -> frozenset[REF]: - return self.inputs | self.outputs | aset(self.activity) - - @classmethod - def group_by_activity(cls, links: set[Self]): - """ - Merge links that share the same (non-null) activity. - """ - groups_by_activity: Mapping[KeyReference, set[Self]] = defaultdict(set) - for link in links: - if link.activity is not None: - groups_by_activity[link.activity].add(link) - for activity, group in groups_by_activity.items(): - if len(group) > 1: - links -= group - merged_link = cls(inputs=frozenset.union(*[link.inputs for link in group]), - activity=activity, - outputs=frozenset.union(*[link.outputs for link in group])) - links.add(merged_link) - - def __lt__(self, other: Self) -> bool: - return min(self.inputs) < min(other.inputs) - - -class EntityLink(Link[EntityReference]): - pass - - -class KeyLink(Link[KeyReference]): - - def to_entity_link(self, - entities_by_key: Mapping[KeyReference, EntityReference] - ) -> EntityLink: - lookup = entities_by_key.__getitem__ - return EntityLink(inputs=frozenset(map(lookup, self.inputs)), - activity=none_safe_apply(lookup, self.activity), - outputs=frozenset(map(lookup, self.outputs))) - - -@attrs.define(kw_only=True) -class AnvilBundle[BUNDLE_FQID: SourcedBundleFQID](Bundle[BUNDLE_FQID], - metaclass=ABCMeta): - # The `entity_type` attribute of these keys contains the entities' BigQuery - # table name (e.g. `anvil_sequencingactivity`), not the entity type used for - # the contributions (e.g. `activities`). The metadata plugin converts from - # the former to the latter during transformation. - entities: dict[EntityReference, MutableJSON] = attrs.field(factory=dict) - links: set[EntityLink] = serializable( - attrs.field(factory=set), - from_json=lambda x: set(EntityLink.from_json(v) for v in x), - to_json=lambda x: [v.to_json() for v in sorted(x)] - ) - orphans: dict[EntityReference, MutableJSON] = attrs.field(factory=dict) - - def reject_joiner(self): - # We can skip the `links` attribute because the only strings it contains - # are UUIDs and table names - self._reject_joiner(chain(self.entities.values(), self.orphans.values())) diff --git a/src/azul/plugins/metadata/anvil/indexer/__init__.py b/src/azul/plugins/metadata/anvil/indexer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/metadata/anvil/indexer/aggregate.py b/src/azul/plugins/metadata/anvil/indexer/aggregate.py deleted file mode 100644 index 8c9ce9bad7..0000000000 --- a/src/azul/plugins/metadata/anvil/indexer/aggregate.py +++ /dev/null @@ -1,74 +0,0 @@ -from operator import ( - itemgetter, -) -from typing import ( - Any, -) - -from azul.collections import ( - compose_keys, - none_safe_tuple_key, -) -from azul.indexer.aggregate import ( - Accumulator, - DistinctAccumulator, - GroupingAggregator, - SetOfDictAccumulator, - SimpleAggregator, - SumAccumulator, -) -from azul.types import ( - JSON, -) - - -class ActivityAggregator(SimpleAggregator): - pass - - -class BiosampleAggregator(SimpleAggregator): - - def _accumulator(self, field: str) -> Accumulator | None: - if field == 'donor_age_at_collection': - return SetOfDictAccumulator(max_size=100, - key=compose_keys(none_safe_tuple_key(none_last=True), - itemgetter('lte', 'gte'))) - else: - return super()._accumulator(field) - - -class DatasetAggregator(SimpleAggregator): - pass - - -class DiagnosisAggregator(SimpleAggregator): - - def _accumulator(self, field: str) -> Accumulator | None: - if field in ('diagnosis_age', 'onset_age'): - return SetOfDictAccumulator(max_size=100, - key=compose_keys(none_safe_tuple_key(none_last=True), - itemgetter('lte', 'gte'))) - else: - return super()._accumulator(field) - - -class DonorAggregator(SimpleAggregator): - pass - - -class FileAggregator(GroupingAggregator): - - def _transform_entity(self, entity: JSON) -> JSON: - return super()._transform_entity(entity) | { - 'file_size': (entity['document_id'], entity['file_size']), - 'count': (entity['document_id'], 1) - } - - def _group_keys(self, entity) -> tuple[Any, ...]: - return entity['file_format'], - - def _accumulator(self, field: str) -> Accumulator | None: - if field in ('count', 'file_size'): - return DistinctAccumulator(SumAccumulator()) - else: - return super()._accumulator(field) diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py deleted file mode 100644 index 2e2f26b6f6..0000000000 --- a/src/azul/plugins/metadata/anvil/indexer/transform.py +++ /dev/null @@ -1,681 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - ChainMap, - defaultdict, -) -from functools import ( - cached_property, - partial, -) -from itertools import ( - chain, -) -import logging -from operator import ( - attrgetter, -) -from typing import ( - AbstractSet, - Callable, - Collection, - Iterable, - Self, -) -from uuid import ( - UUID, -) - -import attr -from more_itertools import ( - one, -) - -from azul import ( - cache, - config, -) -from azul.collections import ( - deep_dict_merge, -) -from azul.indexer import ( - BundleFQID, - BundlePartition, -) -from azul.indexer.aggregate import ( - EntityAggregator, -) -from azul.indexer.document import ( - Contribution, - EntityID, - EntityReference, - EntityType, - Replica, -) -from azul.indexer.field import ( - FieldTypes, - null_bool, - null_int, - null_str, - pass_thru_int, - pass_thru_json, -) -from azul.indexer.transform import ( - ReplicaTransformer, - Transformer, -) -from azul.plugins.metadata.anvil.bundle import ( - AnvilBundle, - EntityLink, -) -from azul.plugins.metadata.anvil.indexer.aggregate import ( - ActivityAggregator, - BiosampleAggregator, - DatasetAggregator, - DiagnosisAggregator, - DonorAggregator, - FileAggregator, -) -from azul.strings import ( - pluralize, -) -from azul.types import ( - JSON, - MutableJSON, - MutableJSONs, -) - -log = logging.getLogger(__name__) - -EntityRefsByType = dict[EntityType, set[EntityReference]] - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class LinkedEntities: - origin: EntityReference - ancestors: EntityRefsByType - descendants: EntityRefsByType - - def __getitem__(self, item: EntityType) -> set[EntityReference]: - return self.ancestors[item] | self.descendants[item] - - def __iter__(self) -> Iterable[EntityReference]: - for entities in self.ancestors.values(): - yield from entities - for entities in self.descendants.values(): - yield from entities - - @classmethod - def from_links(cls, - origin: EntityReference, - links: Collection[EntityLink] - ) -> Self: - return cls(origin=origin, - ancestors=cls._search(origin, links, from_='outputs', to='inputs'), - descendants=cls._search(origin, links, from_='inputs', to='outputs')) - - @classmethod - def _search(cls, - entity_ref: EntityReference, - links: Collection[EntityLink], - entities: EntityRefsByType | None = None, - *, - from_: str, - to: str - ) -> EntityRefsByType: - entities = defaultdict(set) if entities is None else entities - if entity_ref.entity_type.endswith('activity'): - follow = [one(link for link in links if entity_ref == link.activity)] - else: - follow = [link for link in links if entity_ref in getattr(link, from_)] - for link in follow: - for relative in [link.activity, *getattr(link, to)]: - if relative is not None: - if relative != entity_ref and relative.entity_id not in entities[relative.entity_type]: - entities[relative.entity_type].add(relative) - cls._search(relative, links, entities, from_=from_, to=to) - return entities - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class BaseTransformer(Transformer, metaclass=ABCMeta): - bundle: AnvilBundle - - @classmethod - def field_types(cls) -> FieldTypes: - return { - 'activities': cls._activity_types(), - 'biosamples': cls._biosample_types(), - 'datasets': cls._dataset_types(), - 'diagnoses': cls._diagnosis_types(), - 'donors': cls._donor_types(), - 'files': cls._aggregate_file_types(), - } - - @classmethod - def aggregator(cls, entity_type) -> EntityAggregator: - if entity_type == 'activities': - agg_cls = ActivityAggregator - elif entity_type == 'biosamples': - agg_cls = BiosampleAggregator - elif entity_type == 'datasets': - agg_cls = DatasetAggregator - elif entity_type == 'diagnoses': - agg_cls = DiagnosisAggregator - elif entity_type == 'donors': - agg_cls = DonorAggregator - elif entity_type == 'files': - agg_cls = FileAggregator - else: - assert False, entity_type - return agg_cls(cls.entity_type(), entity_type) - - def estimate(self, partition: BundlePartition) -> int: - # Orphans are not considered when deciding whether to partition the - # bundle, but if the bundle is partitioned then each partition will only - # emit replicas for the orphans that it contains - return sum(map(partial(self._contains, partition), self.bundle.entities)) - - def transform(self, - partition: BundlePartition - ) -> Iterable[Contribution | Replica]: - for entity in self._list_entities(): - if self._contains(partition, entity): - yield from self._transform(entity) - - def _list_entities(self) -> Iterable[EntityReference]: - return self.bundle.entities - - @abstractmethod - def _transform(self, - entity: EntityReference - ) -> Iterable[Contribution | Replica]: - raise NotImplementedError - - def _replica_contents(self, entity: EntityReference) -> JSON: - return ChainMap(self.bundle.entities, self.bundle.orphans)[entity] - - def _convert_entity_type(self, entity_type: str) -> str: - assert entity_type == 'bundle' or entity_type.startswith('anvil_'), entity_type - if entity_type == 'anvil_diagnosis': - # Irregular plural form - return 'diagnoses' - elif entity_type.endswith('activity'): - # Polymorphic. Could be `anvil_sequencingactivity`, - # `anvil_assayactivity`, `anvil_activity`, etc - return 'activities' - else: - return pluralize(entity_type.removeprefix('anvil_')) - - def _contains(self, - partition: BundlePartition, - entity: EntityReference - ) -> bool: - return ( - self._convert_entity_type(entity.entity_type) == self.entity_type() - and partition.contains(UUID(entity.entity_id)) - ) - - @cached_property - def _entities_by_type(self) -> dict[EntityType, set[EntityReference]]: - entries = defaultdict(set) - for e in self.bundle.entities: - entries[e.entity_type].add(e) - return entries - - def _linked_entities(self, entity: EntityReference) -> LinkedEntities: - return LinkedEntities.from_links(entity, self.bundle.links) - - @classmethod - def _entity_types(cls) -> FieldTypes: - return { - 'document_id': null_str, - 'source_datarepo_row_ids': [null_str] - } - - @classmethod - def _activity_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'activity_id': null_str, - 'activity_table': null_str, - 'activity_type': null_str, - 'assay_type': null_str, - 'data_modality': null_str, - 'reference_assembly': [null_str], - } - - @classmethod - def _biosample_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'biosample_id': null_str, - 'anatomical_site': null_str, - 'apriori_cell_type': [null_str], - 'biosample_type': null_str, - 'disease': null_str, - 'donor_age_at_collection_unit': null_str, - 'donor_age_at_collection': pass_thru_json, - } - - @classmethod - def _dataset_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'dataset_id': null_str, - 'consent_group': [null_str], - 'data_use_permission': [null_str], - 'owner': [null_str], - 'principal_investigator': [null_str], - 'registered_identifier': [null_str], - 'title': null_str, - 'data_modality': [null_str], - } - - @classmethod - def _diagnosis_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'diagnosis_id': null_str, - 'disease': [null_str], - 'diagnosis_age_unit': null_str, - 'diagnosis_age': pass_thru_json, - 'onset_age_unit': null_str, - 'onset_age': pass_thru_json, - 'phenotype': [null_str], - 'phenopacket': [null_str] - } - - @classmethod - def _donor_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'donor_id': null_str, - 'organism_type': null_str, - 'phenotypic_sex': null_str, - 'reported_ethnicity': null_str, - 'genetic_ancestry': [null_str], - } - - @classmethod - def _file_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'file_id': null_str, - 'data_modality': [null_str], - 'file_format': null_str, - 'file_size': null_int, - 'file_md5sum': null_str, - 'reference_assembly': [null_str], - 'file_name': null_str, - 'is_supplementary': null_bool, - # Not in schema - 'version': null_str, - # FIXME: Redundant file fields for AnVIL are no longer needed - # https://github.com/DataBiosphere/azul/issues/7005 - 'uuid': null_str, - 'size': null_int, - 'name': null_str, - 'crc32': null_str, - 'sha256': null_str, - 'drs_uri': null_str - } - - @classmethod - def _aggregate_file_types(cls) -> FieldTypes: - return { - **cls._file_types(), - 'count': pass_thru_int # Added by FileAggregator, ever null - } - - def _range(self, entity: EntityReference, *field_prefixes: str) -> MutableJSON: - metadata = self.bundle.entities[entity] - - def get_bound(field_name: str) -> float | None: - val = metadata[field_name] - return None if val is None else float(val) - - return { - field_prefix: { - 'gte': get_bound(field_prefix + '_lower_bound'), - 'lte': get_bound(field_prefix + '_upper_bound') - } - for field_prefix in field_prefixes - } - - def _entity(self, - entity: EntityReference, - field_types: FieldTypes, - **additional_fields - ) -> MutableJSON: - metadata = self.bundle.entities[entity] - field_values = ChainMap(metadata, - {'document_id': entity.entity_id}, - additional_fields) - return { - field: field_values[field] - for field in field_types - } - - def _entities(self, - factory: Callable[[EntityReference], MutableJSON], - entities: Iterable[EntityReference], - ) -> MutableJSONs: - return [ - factory(entity) - for entity in sorted(entities, key=attrgetter('entity_id')) - ] - - def _activity(self, activity: EntityReference) -> MutableJSON: - metadata = self.bundle.entities[activity] - field_types = self._activity_types() - common_fields = { - 'activity_table': activity.entity_type, - 'activity_id': metadata[f'{activity.entity_type.removeprefix("anvil_")}_id'] - } - # Activities are unique in that they may not contain every field defined - # in their field types due to polymorphism, so we need to pad the field - # values with nulls. - union_fields = { - field_name: [None] if isinstance(field_type, list) else None - for field_name, field_type in field_types.items() - if field_name not in common_fields - } - activity = self._entity(activity, - self._activity_types(), - **common_fields, - **union_fields) - - return activity - - def _biosample(self, biosample: EntityReference) -> MutableJSON: - return self._entity(biosample, - self._biosample_types(), - **self._range(biosample, 'donor_age_at_collection')) - - def _dataset(self, dataset: EntityReference) -> MutableJSON: - return self._entity(dataset, self._dataset_types()) - - def _diagnosis(self, diagnosis: EntityReference) -> MutableJSON: - return self._entity(diagnosis, - self._diagnosis_types(), - **self._range(diagnosis, 'diagnosis_age', 'onset_age')) - - def _donor(self, donor: EntityReference) -> MutableJSON: - return self._entity(donor, self._donor_types()) - - def _file(self, file: EntityReference) -> MutableJSON: - metadata = self.bundle.entities[file] - return self._entity(file, - self._file_types(), - size=metadata['file_size'], - name=metadata['file_name'], - uuid=file.entity_id) - - def _only_dataset(self) -> EntityReference: - try: - return one(self._entities_by_type['anvil_dataset']) - except ValueError: - return one(o for o in self.bundle.orphans if o.entity_type == 'anvil_dataset') - - @cached_property - def _activity_polymorphic_types(self) -> AbstractSet[str]: - from azul.plugins.metadata.anvil import ( - anvil_schema, - ) - return { - table['name'] - for table in anvil_schema['tables'] - if table['name'].endswith('activity') - } - - @classmethod - def inner_entity_id(cls, entity_type: EntityType, entity: JSON) -> EntityID: - return entity['document_id'] - - @classmethod - def reconcile_inner_entities(cls, - entity_type: EntityType, - *, - this: tuple[JSON, BundleFQID], - that: tuple[JSON, BundleFQID] - ) -> tuple[JSON, BundleFQID]: - this_entity, this_bundle = this - that_entity, that_bundle = that - # All AnVIL bundles use a fixed known version - assert this_bundle.version == that_bundle.version, (this, that) - if this_entity.keys() == that_entity.keys(): - return this - else: - assert entity_type == 'datasets', (entity_type, this, that) - expected_keys = cls._complete_dataset_keys() - # There will be one contribution for a DUOS stub, and many redundant - # contributions (one per non-DUOS bundle) for the dataset metadata - # from BigQuery. Once the stub has been merged with a single main - # contribution to consolidate all expected fields, we can disregard - # the other contributions as usual. - if this_entity.keys() == expected_keys: - return this - elif that_entity.keys() == expected_keys: - return that - else: - assert this_entity.keys() < expected_keys, this - assert that_entity.keys() < expected_keys, that - merged = deep_dict_merge(this_entity, that_entity) - assert merged.keys() == expected_keys, (this, that) - # We can safely discard that_bundle because only the version is - # used by the caller, and we know the versions are equal. - return merged, this_bundle - - @classmethod - @cache - def _complete_dataset_keys(cls) -> AbstractSet[str]: - return cls.field_types()['datasets'].keys() - - -class SingletonTransformer(BaseTransformer, metaclass=ABCMeta): - - def _transform(self, entity: EntityReference) -> Iterable[Contribution]: - contents = dict( - activities=self._entities(self._activity, chain.from_iterable( - self._entities_by_type[activity_type] - for activity_type in self._activity_polymorphic_types - )), - biosamples=self._entities(self._biosample, self._entities_by_type['anvil_biosample']), - datasets=[self._dataset(self._only_dataset())], - diagnoses=self._entities(self._diagnosis, self._entities_by_type['anvil_diagnosis']), - donors=self._entities(self._donor, self._entities_by_type['anvil_donor']), - files=self._entities(self._file, self._entities_by_type['anvil_file']) - ) - yield self._contribution(contents, entity.entity_id) - - @classmethod - def field_types(cls) -> FieldTypes: - return deep_dict_merge( - super().field_types(), - {'datasets': cls._duos_types()} - ) - - @classmethod - def _duos_types(cls) -> FieldTypes: - return { - 'document_id': null_str, - 'description': null_str, - 'duos_id': null_str, - } - - def _duos(self, dataset: EntityReference) -> MutableJSON: - return self._entity(dataset, self._duos_types()) - - def _is_duos(self, dataset: EntityReference) -> bool: - try: - contents = self.bundle.entities[dataset] - except KeyError: - return False - else: - return 'duos_id' in contents - - def _dataset(self, dataset: EntityReference) -> MutableJSON: - if self._is_duos(dataset): - return self._duos(dataset) - else: - return super()._dataset(dataset) - - def _replica_type(self, entity: EntityReference) -> str: - if entity.entity_type == 'anvil_dataset' and self._is_duos(entity): - return 'duos_dataset_registration' - else: - return super()._replica_type(entity) - - def _list_entities(self) -> Iterable[EntityReference]: - # Suppress contributions for bundles that only contain orphans - if self.bundle.entities: - yield self._singleton() - - @abstractmethod - def _singleton(self) -> EntityReference: - raise NotImplementedError - - -class ActivityTransformer(BaseTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'activities' - - def _transform(self, entity: EntityReference) -> Iterable[Contribution]: - linked = self._linked_entities(entity) - contents = dict( - activities=[self._activity(entity)], - biosamples=self._entities(self._biosample, linked['anvil_biosample']), - datasets=[self._dataset(self._only_dataset())], - diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']), - donors=self._entities(self._donor, linked['anvil_donor']), - files=self._entities(self._file, linked['anvil_file']) - ) - yield self._contribution(contents, entity.entity_id) - - -class BiosampleTransformer(BaseTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'biosamples' - - def _transform(self, entity: EntityReference) -> Iterable[Contribution]: - linked = self._linked_entities(entity) - contents = dict( - activities=self._entities(self._activity, chain.from_iterable( - linked[activity_type] - for activity_type in self._activity_polymorphic_types - )), - biosamples=[self._biosample(entity)], - datasets=[self._dataset(self._only_dataset())], - diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']), - donors=self._entities(self._donor, linked['anvil_donor']), - files=self._entities(self._file, linked['anvil_file']), - ) - yield self._contribution(contents, entity.entity_id) - - -class BundleTransformer(SingletonTransformer): - - @classmethod - def entity_type(cls) -> EntityType: - return 'bundles' - - def _singleton(self) -> EntityReference: - return EntityReference(entity_type='bundle', - entity_id=self.bundle.uuid) - - def transform(self, - partition: BundlePartition - ) -> Iterable[Contribution | Replica]: - yield from super().transform(partition) - if config.enable_replicas: - # The file transformer only emits replicas for entities that are - # linked to at least one file. This excludes all orphans, and a - # small number of linked entities, usually from primary bundles - # don't include any files. Some of the replicas we emit here will be - # redundant with those emitted by the file transformer, but these - # will be consolidated by the index service before they are written - # to ElasticSearch. - dataset = self._only_dataset() - for entity in chain(self.bundle.orphans, self.bundle.entities): - if partition.contains(UUID(entity.entity_id)): - yield self._replica(entity, file_hub=None, root_hub=dataset.entity_id) - - -class DatasetTransformer(SingletonTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'datasets' - - def _singleton(self) -> EntityReference: - return self._only_dataset() - - -class DonorTransformer(BaseTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'donors' - - def _transform(self, entity: EntityReference) -> Iterable[Contribution]: - linked = self._linked_entities(entity) - contents = dict( - activities=self._entities(self._activity, chain.from_iterable( - linked[activity_type] - for activity_type in self._activity_polymorphic_types - )), - biosamples=self._entities(self._biosample, linked['anvil_biosample']), - datasets=[self._dataset(self._only_dataset())], - diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']), - donors=[self._donor(entity)], - files=self._entities(self._file, linked['anvil_file']), - ) - yield self._contribution(contents, entity.entity_id) - - -class FileTransformer(BaseTransformer, ReplicaTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'files' - - @classmethod - def hot_entity_types(cls) -> dict[EntityType, EntityType]: - return { - 'anvil_dataset': 'datasets', - } - - def _transform(self, - entity: EntityReference - ) -> Iterable[Contribution | Replica]: - linked = self._linked_entities(entity) - dataset = self._only_dataset() - contents = dict( - activities=self._entities(self._activity, chain.from_iterable( - linked[activity_type] - for activity_type in self._activity_polymorphic_types - )), - biosamples=self._entities(self._biosample, linked['anvil_biosample']), - datasets=[self._dataset(self._only_dataset())], - diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']), - donors=self._entities(self._donor, linked['anvil_donor']), - files=[self._file(entity)], - ) - file_id = entity.entity_id - yield self._contribution(contents, file_id) - if config.enable_replicas: - dataset_id = dataset.entity_id - yield self._replica(entity, file_hub=file_id, root_hub=dataset_id) - for linked_entity in linked: - yield self._replica( - linked_entity, - file_hub=None if linked_entity.entity_type in self.hot_entity_types() else file_id, - root_hub=dataset_id - ) diff --git a/src/azul/plugins/metadata/anvil/schema.py b/src/azul/plugins/metadata/anvil/schema.py deleted file mode 100644 index b0cea08384..0000000000 --- a/src/azul/plugins/metadata/anvil/schema.py +++ /dev/null @@ -1,898 +0,0 @@ -""" -Auto-generated by scripts/download_anvil_schema.py - -Do not edit this file directly. - -Instead, update the script and run `make anvil_schema` -""" - -anvil_schema = { - 'version': 5, - 'tables': [ - { - 'name': 'anvil_activity', - 'columns': [ - { - 'name': 'activity_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'activity_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'used_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'used_biosample_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'activity_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_alignmentactivity', - 'columns': [ - { - 'name': 'alignmentactivity_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'activity_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'used_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'reference_assembly', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'alignmentactivity_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_antibody', - 'columns': [ - { - 'name': 'antibody_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'target', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'antibody_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_assayactivity', - 'columns': [ - { - 'name': 'assayactivity_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'activity_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'antibody_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'assay_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'used_biosample_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'assayactivity_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_biosample', - 'columns': [ - { - 'name': 'biosample_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'anatomical_site', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'apriori_cell_type', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'biosample_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'disease', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'donor_age_at_collection_unit', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'donor_age_at_collection_lower_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'donor_age_at_collection_upper_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'donor_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'part_of_dataset_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'biosample_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_dataset', - 'columns': [ - { - 'name': 'dataset_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'consent_group', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'data_use_permission', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'owner', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'principal_investigator', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'registered_identifier', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'title', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'dataset_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_diagnosis', - 'columns': [ - { - 'name': 'diagnosis_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'donor_id', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'disease', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'diagnosis_age_unit', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'diagnosis_age_lower_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'diagnosis_age_upper_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'onset_age_unit', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'onset_age_lower_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'onset_age_upper_bound', - 'datatype': 'float', - 'array_of': False, - 'required': False - }, - { - 'name': 'phenotype', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'phenopacket', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'diagnosis_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_donor', - 'columns': [ - { - 'name': 'donor_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'organism_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'part_of_dataset_id', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'phenotypic_sex', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'reported_ethnicity', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'genetic_ancestry', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'donor_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_file', - 'columns': [ - { - 'name': 'file_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'file_format', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'file_size', - 'datatype': 'integer', - 'array_of': False, - 'required': False - }, - { - 'name': 'file_md5sum', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'reference_assembly', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'file_name', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'file_ref', - 'datatype': 'fileref', - 'array_of': False, - 'required': False - }, - { - 'name': 'is_supplementary', - 'datatype': 'boolean', - 'array_of': False, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'file_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_project', - 'columns': [ - { - 'name': 'project_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'funded_by', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_dataset_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'principal_investigator', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'title', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'registered_identifier', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'project_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_sequencingactivity', - 'columns': [ - { - 'name': 'sequencingactivity_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'activity_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'assay_type', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'used_biosample_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'sequencingactivity_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - }, - { - 'name': 'anvil_variantcallingactivity', - 'columns': [ - { - 'name': 'variantcallingactivity_id', - 'datatype': 'string', - 'array_of': False, - 'required': True - }, - { - 'name': 'activity_type', - 'datatype': 'string', - 'array_of': False, - 'required': False - }, - { - 'name': 'used_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'generated_file_id', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'reference_assembly', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'data_modality', - 'datatype': 'string', - 'array_of': True, - 'required': False - }, - { - 'name': 'source_datarepo_row_ids', - 'datatype': 'string', - 'array_of': True, - 'required': False - } - ], - 'primaryKey': [ - 'variantcallingactivity_id' - ], - 'partitionMode': 'none', - 'datePartitionOptions': None, - 'intPartitionOptions': None - } - ], - 'relationships': [ - { - 'name': 'from_activity.used_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_activity', - 'column': 'used_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_activity.generated_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_activity', - 'column': 'generated_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_activity.used_biosample_id_to_biosample.biosample_id', - 'from': { - 'table': 'anvil_activity', - 'column': 'used_biosample_id' - }, - 'to': { - 'table': 'anvil_biosample', - 'column': 'biosample_id' - } - }, - { - 'name': 'from_alignmentactivity.used_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_alignmentactivity', - 'column': 'used_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_alignmentactivity.generated_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_alignmentactivity', - 'column': 'generated_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_assayactivity.antibody_id_to_antibody.antibody_id', - 'from': { - 'table': 'anvil_assayactivity', - 'column': 'antibody_id' - }, - 'to': { - 'table': 'anvil_antibody', - 'column': 'antibody_id' - } - }, - { - 'name': 'from_assayactivity.generated_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_assayactivity', - 'column': 'generated_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_assayactivity.used_biosample_id_to_biosample.biosample_id', - 'from': { - 'table': 'anvil_assayactivity', - 'column': 'used_biosample_id' - }, - 'to': { - 'table': 'anvil_biosample', - 'column': 'biosample_id' - } - }, - { - 'name': 'from_biosample.donor_id_to_donor.donor_id', - 'from': { - 'table': 'anvil_biosample', - 'column': 'donor_id' - }, - 'to': { - 'table': 'anvil_donor', - 'column': 'donor_id' - } - }, - { - 'name': 'from_biosample.part_of_dataset_id_to_dataset.dataset_id', - 'from': { - 'table': 'anvil_biosample', - 'column': 'part_of_dataset_id' - }, - 'to': { - 'table': 'anvil_dataset', - 'column': 'dataset_id' - } - }, - { - 'name': 'from_donor.part_of_dataset_id_to_dataset.dataset_id', - 'from': { - 'table': 'anvil_donor', - 'column': 'part_of_dataset_id' - }, - 'to': { - 'table': 'anvil_dataset', - 'column': 'dataset_id' - } - }, - { - 'name': 'from_diagnosis.donor_id_to_donor.donor_id', - 'from': { - 'table': 'anvil_diagnosis', - 'column': 'donor_id' - }, - 'to': { - 'table': 'anvil_donor', - 'column': 'donor_id' - } - }, - { - 'name': 'from_project.generated_dataset_id_to_dataset.dataset_id', - 'from': { - 'table': 'anvil_project', - 'column': 'generated_dataset_id' - }, - 'to': { - 'table': 'anvil_dataset', - 'column': 'dataset_id' - } - }, - { - 'name': 'from_sequencingactivity.generated_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_sequencingactivity', - 'column': 'generated_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_sequencingactivity.used_biosample_id_to_biosample.biosample_id', - 'from': { - 'table': 'anvil_sequencingactivity', - 'column': 'used_biosample_id' - }, - 'to': { - 'table': 'anvil_biosample', - 'column': 'biosample_id' - } - }, - { - 'name': 'from_variantcallingactivity.used_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_variantcallingactivity', - 'column': 'used_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - }, - { - 'name': 'from_variantcallingactivity.generated_file_id_to_file.file_id', - 'from': { - 'table': 'anvil_variantcallingactivity', - 'column': 'generated_file_id' - }, - 'to': { - 'table': 'anvil_file', - 'column': 'file_id' - } - } - ] -} diff --git a/src/azul/plugins/metadata/anvil/service/__init__.py b/src/azul/plugins/metadata/anvil/service/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/metadata/anvil/service/aggregation.py b/src/azul/plugins/metadata/anvil/service/aggregation.py deleted file mode 100644 index 02452feb51..0000000000 --- a/src/azul/plugins/metadata/anvil/service/aggregation.py +++ /dev/null @@ -1,39 +0,0 @@ -from opensearchpy import ( - Search, -) -from opensearchpy.helpers.aggs import ( - Agg, -) - -from azul.plugins import ( - FieldPath, -) -from azul.service.elasticsearch_service import ( - AggregationStage, -) -from azul.types import ( - MutableJSON, -) - - -class AnvilAggregationStage(AggregationStage): - - def _prepare_aggregation(self, *, facet: str, facet_path: FieldPath) -> Agg: - agg = super()._prepare_aggregation(facet=facet, facet_path=facet_path) - return agg - - -class AnvilSummaryAggregationStage(AnvilAggregationStage): - - def prepare_request(self, request: Search) -> Search: - request = super().prepare_request(request) - request = request.extra(size=0) - if self.entity_type == 'files': - request.aggs.metric('totalFileSize', - 'sum', - field='contents.files.size_') - return request - - def process_response(self, response: MutableJSON) -> MutableJSON: - response = super().process_response(response) - return response['aggregations'] diff --git a/src/azul/plugins/metadata/anvil/service/filter.py b/src/azul/plugins/metadata/anvil/service/filter.py deleted file mode 100644 index 0b21e2c62b..0000000000 --- a/src/azul/plugins/metadata/anvil/service/filter.py +++ /dev/null @@ -1,10 +0,0 @@ -from azul.service.elasticsearch_service import ( - FilterStage, -) - - -class AnvilFilterStage(FilterStage): - - @property - def _limit_access(self) -> bool: - return self.entity_type != 'datasets' diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py deleted file mode 100644 index 23ac4deb43..0000000000 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ /dev/null @@ -1,251 +0,0 @@ -from functools import ( - partial, -) -from typing import ( - Mapping, - Sequence, -) - -from more_itertools import ( - one, -) - -from azul import ( - cached_property, -) -from azul.json import ( - copy_json, -) -from azul.plugins import ( - SpecialFields, -) -from azul.service.elasticsearch_service import ( - ResponseTriple, -) -from azul.service.repository_service import ( - SearchResponseStage, - SummaryResponseStage, -) -from azul.types import ( - JSON, - MutableJSON, -) - - -class AnvilSummaryResponseStage(SummaryResponseStage): - - @property - def aggs_by_authority(self) -> Mapping[str, Sequence[str]]: - return { - 'activities': [ - 'activities.activity_type' - ], - 'biosamples': [ - 'biosamples.anatomical_site' - ], - 'datasets': [ - 'datasets.title' - ], - 'donors': [ - 'donors.organism_type', - 'diagnoses.disease', - 'diagnoses.phenotype' - ], - 'files': [ - 'files.file_format', - 'totalFileSize' - ] - } - - def process_response(self, response: JSON) -> JSON: - def doc_count(field): - return response[field]['doc_count'] - - def bucket_count(field, bucket_key): - return [ - { - 'count': bucket['doc_count'], - bucket_key: bucket['key'] - } - for bucket in response[field]['myTerms']['buckets'] - ] - - return { - 'activityCount': doc_count('activities.activity_type'), - 'activityTypes': bucket_count('activities.activity_type', 'type'), - 'biosampleCount': doc_count('biosamples.anatomical_site'), - 'datasetCount': doc_count('datasets.title'), - 'donorCount': doc_count('donors.organism_type'), - 'donorDiagnosisDiseases': bucket_count('diagnoses.disease', 'disease'), - 'donorDiagnosisPhenotypes': bucket_count('diagnoses.phenotype', 'phenotype'), - 'donorSpecies': bucket_count('donors.organism_type', 'species'), - 'fileCount': doc_count('files.file_format'), - 'fileFormats': bucket_count('files.file_format', 'format'), - 'totalFileSize': response['totalFileSize']['value'], - } - - -class AnvilSearchResponseStage(SearchResponseStage): - - def process_response(self, response: ResponseTriple) -> MutableJSON: - hits, pagination, aggs = response - return dict( - hits=list(map(self._make_hit, hits)), - pagination=pagination, - termFacets=dict(zip(aggs.keys(), map(self._make_terms, aggs.values()))) - ) - - def _make_terms(self, agg: JSON) -> JSON: - # FIXME: much of this is duplicated from - # azul.plugins.metadata.hca.service.response.SearchResponseFactory - # https://github.com/DataBiosphere/azul/issues/4135 - def choose_entry(_term): - if 'key_as_string' in _term: - return _term['key_as_string'] - elif (term_key := _term['key']) is None: - return None - elif isinstance(term_key, bool): - return str(term_key).lower() - elif isinstance(term_key, dict): - return term_key - else: - return str(term_key) - - terms = [ - { - 'term': choose_entry(bucket), - 'count': bucket['doc_count'] - } - for bucket in agg['myTerms']['buckets'] - ] - - # Add the untagged_count to the existing termObj for a None value, - # or add a new one - untagged_count = agg['untagged']['doc_count'] - if untagged_count > 0: - for term in terms: - if term['term'] is None: - term['count'] += untagged_count - break - else: - terms.append({'term': None, 'count': untagged_count}) - - return { - 'terms': terms, - 'total': 0 if len(agg['myTerms']['buckets']) == 0 else agg['doc_count'], - # FIXME: Remove type from termsFacets in /index responses - # https://github.com/DataBiosphere/azul/issues/2460 - 'type': 'terms' - } - - def _make_hit(self, es_hit: JSON) -> MutableJSON: - return { - 'entryId': es_hit['entity_id'], - # Note that there is a brittle coupling that must be maintained - # between the `sources` and `bundles` field paths here and the - # renamed fields in `Plugin.manifest_config`. - 'sources': list(map(self._make_source, es_hit['sources'])), - 'bundles': list(map(self._make_bundle, es_hit['bundles'])), - **self._make_contents(es_hit['contents']) - } - - def _make_source(self, es_source: JSON) -> MutableJSON: - return { - self._special_fields.source_spec: es_source['spec'], - self._special_fields.source_id: es_source['id'] - } - - @cached_property - def _special_fields(self) -> SpecialFields: - return self.plugin.special_fields - - def _make_bundle(self, es_bundle: JSON) -> MutableJSON: - return { - self._special_fields.bundle_uuid: es_bundle['uuid'], - self._special_fields.bundle_version: es_bundle['version'] - } - - def _make_contents(self, es_contents: JSON) -> MutableJSON: - return { - inner_entity_type: ( - [self._pivotal_entity(inner_entity_type, one(inner_entities))] - if inner_entity_type == self.entity_type else - list(map(partial(self._non_pivotal_entity, inner_entity_type), inner_entities)) - ) - for inner_entity_type, inner_entities in es_contents.items() - } - - def _pivotal_entity(self, - inner_entity_type: str, - inner_entity: JSON - ) -> MutableJSON: - inner_entity = copy_json(inner_entity) - if inner_entity_type == 'files': - inner_entity['uuid'] = inner_entity['document_id'] - return inner_entity - - def _non_pivotal_entity(self, - inner_entity_type: str, - inner_entity: JSON - ) -> MutableJSON: - fields = self._non_pivotal_fields_by_entity_type[inner_entity_type] - return { - k: v - for k, v in inner_entity.items() - if k in fields - } - - @cached_property - def _non_pivotal_fields_by_entity_type(self) -> dict[str, set[str]]: - return { - 'activities': { - 'activity_type', - 'assay_type', - 'data_modality' - }, - 'biosamples': { - 'anatomical_site', - 'biosample_type', - 'disease', - 'donor_age_at_collection_unit', - 'donor_age_at_collection', - }, - 'datasets': { - 'dataset_id', - 'duos_id', - 'title' - }, - 'diagnoses': { - 'disease', - 'phenotype', - 'phenopacket', - 'onset_age_unit', - 'diagnosis_age_unit', - *( - # These fields are of high cardinality, but the number of - # aggregated inner entities per donor should be low. Since - # diagnoses do not appear in the index as outer entities, - # this is our only opportunity to display these fields. - [ - 'diagnosis_age', - 'onset_age' - ] - if self.entity_type == 'donors' else - [] - ) - }, - 'donors': { - 'organism_type', - 'phenotypic_sex', - 'reported_ethnicity', - 'genetic_ancestry' - }, - 'files': { - 'count', - 'data_modality', - 'file_format', - 'file_size', - 'is_supplementary', - 'reference_assembly' - } - } diff --git a/src/azul/plugins/metadata/hca/__init__.py b/src/azul/plugins/metadata/hca/__init__.py deleted file mode 100644 index fb665d6f4d..0000000000 --- a/src/azul/plugins/metadata/hca/__init__.py +++ /dev/null @@ -1,521 +0,0 @@ -from typing import ( - Iterable, - Self, - Sequence, - TYPE_CHECKING, -) - -from attrs import ( - frozen, -) - -from azul import ( - JSON, - R, - config, - iif, -) -from azul.digests import ( - Digest, -) -from azul.indexer.document import ( - Aggregate, - DocumentType, - EntityType, - IndexName, -) -from azul.plugins import ( - DocumentSlice, - File, - ManifestConfig, - MetadataPlugin, - Sorting, - SpecialFields, -) -from azul.plugins.metadata.hca.bundle import ( - HCABundle, -) -from azul.plugins.metadata.hca.indexer.aggregate import ( - HCAAggregate, -) -from azul.plugins.metadata.hca.indexer.transform import ( - BaseTransformer, - BundleTransformer, - CellSuspensionTransformer, - FileTransformer, - ProjectTransformer, - SampleTransformer, -) -from azul.plugins.metadata.hca.service.aggregation import ( - HCAAggregationStage, - HCASummaryAggregationStage, -) -from azul.plugins.metadata.hca.service.filter import ( - HCAFilterStage, -) -from azul.plugins.metadata.hca.service.response import ( - HCASearchResponseStage, - HCASummaryResponseStage, -) -from azul.service.manifest_service import ( - ManifestFormat, -) -from azul.types import ( - MutableJSON, -) -from humancellatlas.data.metadata import ( - api, -) - - -class Plugin(MetadataPlugin[HCABundle]): - - def transformer_types(self) -> Iterable[type[BaseTransformer]]: - return ( - FileTransformer, - CellSuspensionTransformer, - SampleTransformer, - ProjectTransformer, - BundleTransformer - ) - - def transformers(self, - bundle: HCABundle, - *, - delete: bool - ) -> Iterable[BaseTransformer]: - api_bundle = api.Bundle(uuid=bundle.uuid, - version=bundle.version, - manifest=bundle.manifest, - metadata=bundle.metadata, - links_json=bundle.links, - stitched_entity_ids=bundle.stitched) - - def transformers(): - for transformer_cls in self.transformer_types(): - if TYPE_CHECKING: # work around https://youtrack.jetbrains.com/issue/PY-44728 - transformer_cls = BaseTransformer - yield transformer_cls(bundle=bundle, api_bundle=api_bundle, deleted=delete) - - return list(transformers()) - - def aggregate_class(self) -> type[Aggregate]: - return HCAAggregate - - def mapping(self, index_name: IndexName) -> MutableJSON: - mapping = super().mapping(index_name) - if index_name.doc_type in (DocumentType.contribution, DocumentType.aggregate): - mapping['properties']['contents'] = { - 'properties': { - 'projects': { - 'properties': { - 'accessions': { - 'type': 'nested' - }, - 'tissue_atlas': { - 'type': 'nested' - } - } - } - } - } - mapping['dynamic_templates'][0:0] = [ - { - 'donor_age_range': { - 'path_match': 'contents.donors.organism_age_range', - 'mapping': self.range_mapping - } - }, - { - 'exclude_metadata_field': { - 'path_match': 'contents.metadata', - 'mapping': { - 'enabled': False - } - } - }, - { - 'exclude_metadata_field': { - 'path_match': 'contents.files.related_files', - 'mapping': { - 'enabled': False - } - } - }, - { - 'project_nested_contributors': { - 'path_match': 'contents.projects.contributors', - 'mapping': { - 'enabled': False - } - } - }, - { - 'project_nested_publications': { - 'path_match': 'contents.projects.publications', - 'mapping': { - 'enabled': False - } - } - } - ] - return mapping - - @property - def exposed_indices(self) -> dict[EntityType, Sorting]: - return dict( - bundles=Sorting(field_name=self.special_fields.bundle_version, - descending=True, - max_page_size=100), - files=Sorting(field_name='fileName'), - projects=Sorting(field_name='projectTitle', - max_page_size=100), - samples=Sorting(field_name='sampleId') - ) - - @property - def manifest_formats(self) -> Sequence[ManifestFormat]: - return [ - ManifestFormat.compact, - ManifestFormat.terra_pfb, - ManifestFormat.curl, - *iif(config.enable_replicas, [ - ManifestFormat.verbatim_jsonl, - ManifestFormat.verbatim_pfb - ]) - ] - - @property - def _field_mapping(self) -> MetadataPlugin._FieldMapping: - # FIXME: Detect invalid values in field mapping - # https://github.com/DataBiosphere/azul/issues/3071 - return { - 'entity_id': 'entryId', - 'bundles': { - 'uuid': self.special_fields.bundle_uuid, - 'version': self.special_fields.bundle_version - }, - 'sources': { - 'id': self.special_fields.source_id, - 'spec': self.special_fields.source_spec - }, - 'cell_count': 'cellCount', - 'effective_cell_count': 'effectiveCellCount', - 'contents': { - 'dates': { - 'submission_date': 'submissionDate', - 'update_date': 'updateDate', - 'last_modified_date': 'lastModifiedDate', - 'aggregate_submission_date': 'aggregateSubmissionDate', - 'aggregate_update_date': 'aggregateUpdateDate', - 'aggregate_last_modified_date': 'aggregateLastModifiedDate' - }, - 'files': { - 'file_format': 'fileFormat', - 'name': 'fileName', - 'size': 'fileSize', - 'file_source': 'fileSource', - 'uuid': 'fileId', - 'version': 'fileVersion', - 'content_description': 'contentDescription', - 'matrix_cell_count': 'matrixCellCount', - 'is_intermediate': 'isIntermediate' - }, - 'projects': { - 'contact_names': 'contactName', - 'document_id': 'projectId', - 'institutions': 'institution', - 'laboratory': 'laboratory', - 'project_description': 'projectDescription', - 'project_short_name': 'project', - 'project_title': 'projectTitle', - 'publication_titles': 'publicationTitle', - 'accessions': 'accessions', - 'estimated_cell_count': 'projectEstimatedCellCount', - 'is_tissue_atlas_project': 'isTissueAtlasProject', - 'tissue_atlas': 'tissueAtlas', - 'bionetwork_name': 'bionetworkName', - 'data_use_restriction': 'dataUseRestriction', - 'duos_id': 'duosId' - }, - 'sequencing_protocols': { - 'instrument_manufacturer_model': 'instrumentManufacturerModel', - 'paired_end': 'pairedEnd' - }, - 'library_preparation_protocols': { - 'library_construction_approach': 'libraryConstructionApproach', - 'nucleic_acid_source': 'nucleicAcidSource' - }, - 'analysis_protocols': { - 'workflow': 'workflow' - }, - 'imaging_protocols': { - 'assay_type': 'assayType' - }, - 'donors': { - 'biological_sex': 'biologicalSex', - 'genus_species': 'genusSpecies', - 'diseases': 'donorDisease', - 'development_stage': 'developmentStage', - 'organism_age': 'organismAge', - 'organism_age_range': 'organismAgeRange', - 'donor_count': 'donorCount' - }, - 'samples': { - 'biomaterial_id': 'sampleId', - 'entity_type': 'sampleEntityType', - 'organ': 'organ', - 'organ_part': 'organPart', - 'model_organ': 'modelOrgan', - 'model_organ_part': 'modelOrganPart', - 'effective_organ': 'effectiveOrgan' - }, - 'sample_specimens': { - 'disease': 'sampleDisease' - }, - 'specimens': { - 'disease': 'specimenDisease', - 'organ': 'specimenOrgan', - 'organ_part': 'specimenOrganPart', - 'preservation_method': 'preservationMethod' - }, - 'cell_suspensions': { - 'selected_cell_type': 'selectedCellType' - }, - 'cell_lines': { - 'cell_line_type': 'cellLineType' - } - } - } - - @property - def special_fields(self) -> SpecialFields: - return SpecialFields(source_id='sourceId', - source_spec='sourceSpec', - bundle_uuid='bundleUuid', - bundle_version='bundleVersion', - root_entity_id='projectId') - - @property - def root_entity_type(self) -> str: - return 'projects' - - @property - def facets(self) -> Sequence[str]: - return [ - *super().facets, - 'organ', - 'organPart', - 'modelOrgan', - 'modelOrganPart', - 'effectiveOrgan', - 'specimenOrgan', - 'specimenOrganPart', - 'sampleEntityType', - 'libraryConstructionApproach', - 'nucleicAcidSource', - 'genusSpecies', - 'organismAge', - 'biologicalSex', - 'sampleDisease', - 'specimenDisease', - 'donorDisease', - 'developmentStage', - 'instrumentManufacturerModel', - 'pairedEnd', - 'workflow', - 'assayType', - 'project', - 'fileFormat', - 'fileSource', - 'isIntermediate', - 'contentDescription', - 'laboratory', - 'preservationMethod', - 'projectTitle', - 'cellLineType', - 'selectedCellType', - 'projectDescription', - 'institution', - 'contactName', - 'publicationTitle', - 'isTissueAtlasProject', - 'tissueAtlas', - 'bionetworkName', - 'dataUseRestriction' - ] - - @property - def manifest_config(self) -> ManifestConfig: - return { - ('sources',): { - 'id': 'source_id', - 'spec': 'source_spec', - }, - ('bundles',): { - 'uuid': 'bundle_uuid', - 'version': 'bundle_version' - }, - ('contents', 'files'): { - 'document_id': 'file_document_id', - 'file_type': 'file_type', - 'name': 'file_name', - 'file_format': 'file_format', - 'read_index': 'read_index', - 'size': 'file_size', - 'uuid': 'file_uuid', - 'version': 'file_version', - 'crc32c': 'file_crc32c', - 'sha256': 'file_sha256', - 'content-type': 'file_content_type', - 'drs_uri': 'file_drs_uri', - 'file_url': 'file_url' - }, - ('contents', 'cell_suspensions'): { - 'document_id': 'cell_suspension.provenance.document_id', - 'biomaterial_id': 'cell_suspension.biomaterial_core.biomaterial_id', - 'total_estimated_cells': 'cell_suspension.estimated_cell_count', - 'selected_cell_type': 'cell_suspension.selected_cell_type' - }, - ('contents', 'sequencing_processes'): { - 'document_id': 'sequencing_process.provenance.document_id' - }, - ('contents', 'sequencing_protocols'): { - 'instrument_manufacturer_model': 'sequencing_protocol.instrument_manufacturer_model', - 'paired_end': 'sequencing_protocol.paired_end' - }, - ('contents', 'library_preparation_protocols'): { - 'library_construction_approach': 'library_preparation_protocol.library_construction_approach', - 'nucleic_acid_source': 'library_preparation_protocol.nucleic_acid_source' - }, - ('contents', 'projects'): { - 'document_id': 'project.provenance.document_id', - 'institutions': 'project.contributors.institution', - 'laboratory': 'project.contributors.laboratory', - 'project_short_name': 'project.project_core.project_short_name', - 'project_title': 'project.project_core.project_title', - 'estimated_cell_count': 'project.estimated_cell_count' - }, - ('contents', 'specimens'): { - 'document_id': 'specimen_from_organism.provenance.document_id', - 'disease': 'specimen_from_organism.diseases', - 'organ': 'specimen_from_organism.organ', - 'organ_part': 'specimen_from_organism.organ_part', - 'preservation_method': 'specimen_from_organism.preservation_storage.preservation_method' - }, - ('contents', 'donors'): { - 'biological_sex': 'donor_organism.sex', - 'biomaterial_id': 'donor_organism.biomaterial_core.biomaterial_id', - 'document_id': 'donor_organism.provenance.document_id', - 'genus_species': 'donor_organism.genus_species', - 'development_stage': 'donor_organism.development_stage', - 'diseases': 'donor_organism.diseases', - 'organism_age': 'donor_organism.organism_age' - }, - ('contents', 'cell_lines'): { - 'document_id': 'cell_line.provenance.document_id', - 'biomaterial_id': 'cell_line.biomaterial_core.biomaterial_id' - }, - ('contents', 'organoids'): { - 'document_id': 'organoid.provenance.document_id', - 'biomaterial_id': 'organoid.biomaterial_core.biomaterial_id', - 'model_organ': 'organoid.model_organ', - 'model_organ_part': 'organoid.model_organ_part' - }, - ('contents', 'samples'): { - 'entity_type': '_entity_type', - 'document_id': 'sample.provenance.document_id', - 'biomaterial_id': 'sample.biomaterial_core.biomaterial_id' - }, - ('contents', 'sequencing_inputs'): { - 'document_id': 'sequencing_input.provenance.document_id', - 'biomaterial_id': 'sequencing_input.biomaterial_core.biomaterial_id', - 'sequencing_input_type': 'sequencing_input_type' - } - } - - def document_slice(self, entity_type: str) -> DocumentSlice | None: - if entity_type in ('files', 'bundles'): - return None - else: - return DocumentSlice(excludes=['bundles']) - - @property - def summary_response_stage(self) -> type[HCASummaryResponseStage]: - return HCASummaryResponseStage - - @property - def search_response_stage(self) -> type[HCASearchResponseStage]: - return HCASearchResponseStage - - @property - def summary_aggregation_stage(self) -> type[HCASummaryAggregationStage]: - return HCASummaryAggregationStage - - @property - def aggregation_stage(self) -> type[HCAAggregationStage]: - return HCAAggregationStage - - @property - def filter_stage(self) -> type[HCAFilterStage]: - return HCAFilterStage - - @property - def file_class(self) -> type[File]: - return HCAFile - - -@frozen(kw_only=True) -class HCAFile(File): - #: Various checksums of the file's content - sha256: str - # crc32c is required by the HCA API, but we allow it to be None for mirroring - crc32c: str | None = None - sha1: str | None = None - s3_etag: str | None = None - - @classmethod - def from_hit(cls, hit: JSON) -> Self: - return cls(uuid=hit['uuid'], - version=hit['version'], - name=hit['name'], - size=hit['size'], - drs_uri=hit['drs_uri'], - content_type=hit['content-type'], - sha256=hit['sha256'], - crc32c=hit['crc32c'], - sha1=hit.get('sha1'), - s3_etag=hit.get('s3_etag')) - - @classmethod - def from_descriptor(cls, - descriptor: JSON, - *, - uuid: str, - name: str, - drs_uri: str | None) -> Self: - content_type = descriptor['content_type'] - # FIXME: Obsolete MIME parameter in file content types - # https://github.com/HumanCellAtlas/dcp2/issues/73 - parameter_suffix = '; dcp-type=data' - if content_type.endswith(parameter_suffix): - content_type = content_type.removesuffix(parameter_suffix) - else: - # FIXME: Re-enable assertion, potentially in a weakened form - # https://github.com/DataBiosphere/azul/issues/7244 - assert True or ';' not in content_type, R( - 'Unexpected MIME parameter in content type', content_type) - return cls(uuid=uuid, - name=name, - version=descriptor['file_version'], - size=descriptor['size'], - content_type=content_type, - sha256=descriptor['sha256'], - crc32c=descriptor['crc32c'], - sha1=descriptor.get('sha1'), - s3_etag=descriptor.get('s3_etag'), - drs_uri=drs_uri) - - @property - def digest(self) -> Digest: - return Digest(value=self.sha256, type='sha256') diff --git a/src/azul/plugins/metadata/hca/bundle.py b/src/azul/plugins/metadata/hca/bundle.py deleted file mode 100644 index 12d829fc71..0000000000 --- a/src/azul/plugins/metadata/hca/bundle.py +++ /dev/null @@ -1,49 +0,0 @@ -from abc import ( - ABCMeta, -) -import logging - -import attrs - -from azul.attrs import ( - serializable, -) -from azul.indexer import ( - Bundle, - SourcedBundleFQID, -) -from azul.types import ( - MutableJSON, -) - -log = logging.getLogger(__name__) - - -@attrs.define(kw_only=True) -class HCABundle[BUNDLE_FQID: SourcedBundleFQID](Bundle[BUNDLE_FQID], - metaclass=ABCMeta): - manifest: MutableJSON - """ - Each item of the `manifest` attribute's value has this shape: - { - 'content-type': 'application/json; dcp-type="metadata/biomaterial"', - 'crc32c': 'fd239631', - 'indexed': True, - 'name': 'cell_suspension_0.json', - 's3_etag': 'aa31c093cc816edb1f3a42e577872ec6', - 'sha1': 'f413a9a7923dee616309e4f40752859195798a5d', - 'sha256': 'ea4c9ed9e53a3aa2ca4b7dffcacb6bbe9108a460e8e15d2b3d5e8e5261fb043e', - 'size': 1366, - 'uuid': '0136ebb4-1317-42a0-8826-502fae25c29f', - 'version': '2019-05-16T162155.020000Z' - } - """ - metadata: MutableJSON - links: MutableJSON - stitched: set[str] = serializable(attrs.field(factory=set), - to_json=sorted, - from_json=set) - - def reject_joiner(self): - # We can skip the `stitched` attribute because it only contains UUIDs - self._reject_joiner([self.manifest, self.metadata, self.links]) diff --git a/src/azul/plugins/metadata/hca/indexer/__init__.py b/src/azul/plugins/metadata/hca/indexer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/metadata/hca/indexer/aggregate.py b/src/azul/plugins/metadata/hca/indexer/aggregate.py deleted file mode 100644 index cf139c127c..0000000000 --- a/src/azul/plugins/metadata/hca/indexer/aggregate.py +++ /dev/null @@ -1,258 +0,0 @@ -from operator import ( - itemgetter, -) -from typing import ( - Any, - cast, -) - -from more_itertools import ( - one, -) - -from azul import ( - cached_property, -) -from azul.collections import ( - compose_keys, - none_safe_itemgetter, - none_safe_key, - none_safe_tuple_key, -) -from azul.indexer.aggregate import ( - Accumulator, - DictAccumulator, - DistinctAccumulator, - FrequencySetAccumulator, - GroupingAggregator, - MaxAccumulator, - MinAccumulator, - SetAccumulator, - SetOfDictAccumulator, - SimpleAggregator, - SingleValueAccumulator, - SumAccumulator, - UniqueValueCountAccumulator, -) -from azul.indexer.document import ( - Aggregate, -) -from azul.indexer.field import ( - FieldTypes, - null_int, -) -from azul.types import ( - JSON, -) - - -class HCAAggregate(Aggregate): - - @cached_property - def cell_count(self) -> int: - return sum(cs['total_estimated_cells'] - for cs in self.contents['cell_suspensions'] - if cs['total_estimated_cells'] is not None) - - @cached_property - def effective_cell_count(self) -> int: - if self.entity.entity_type == 'projects': - project = cast(JSON, one(self.contents['projects'])) - project_cells = project['estimated_cell_count'] - if project_cells is None: - return self.cell_count - else: - return project_cells - else: - return self.cell_count - - @classmethod - def field_types(cls, field_types: FieldTypes) -> FieldTypes: - return dict(super().field_types(field_types), - cell_count=null_int, - effective_cell_count=null_int) - - def to_json(self) -> JSON: - return dict(super().to_json(), - cell_count=self.cell_count, - effective_cell_count=self.effective_cell_count) - - -class FileAggregator(GroupingAggregator): - - def _transform_entity(self, entity: JSON) -> JSON: - fqid = entity['uuid'], entity['version'] - return dict(size=(fqid, entity['size']), - file_format=entity['file_format'], - file_source=entity['file_source'], - is_intermediate=entity['is_intermediate'], - count=(fqid, 1), - content_description=entity['content_description'], - matrix_cell_count=(fqid, entity.get('matrix_cell_count'))) - - def _group_keys(self, entity) -> tuple[Any, ...]: - return ( - frozenset(entity['content_description']), - entity['file_format'], - entity['is_intermediate'] - ) - - def _accumulator(self, field) -> Accumulator | None: - if field in ('content_description', 'file_format', 'is_intermediate'): - return SingleValueAccumulator() - elif field == 'file_source': - return SetAccumulator(max_size=100) - elif field in ('size', 'count', 'matrix_cell_count'): - return DistinctAccumulator(SumAccumulator()) - else: - return super()._accumulator(field) - - def _default_accumulator(self) -> Accumulator | None: - return None - - -class SampleAggregator(SimpleAggregator): - pass - - -class SpecimenAggregator(SimpleAggregator): - pass - - -class CellSuspensionAggregator(GroupingAggregator): - cell_count_fields = frozenset([ - 'total_estimated_cells', - 'total_estimated_cells_redundant' - ]) - - def _transform_entity(self, entity: JSON) -> JSON: - return entity | { - field: (entity['document_id'], entity[field]) - for field in self.cell_count_fields - } - - def _group_keys(self, entity) -> tuple[Any, ...]: - return frozenset(entity['organ']), - - def _accumulator(self, field) -> Accumulator | None: - if field in self.cell_count_fields: - return DistinctAccumulator(SumAccumulator()) - else: - return super()._accumulator(field) - - -class CellLineAggregator(SimpleAggregator): - pass - - -class DonorOrganismAggregator(SimpleAggregator): - - def _transform_entity(self, entity: JSON) -> JSON: - return { - **entity, - 'donor_count': entity['biomaterial_id'] - } - - def _accumulator(self, field) -> Accumulator | None: - if field == 'organism_age_range': - return SetAccumulator(max_size=100) - elif field == 'organism_age': - return SetOfDictAccumulator(max_size=100, - key=compose_keys(none_safe_tuple_key(none_last=True), - none_safe_itemgetter('value', 'unit'))) - elif field == 'donor_count': - return UniqueValueCountAccumulator() - elif field == 'document_id': - # If any donor IDs are missing from the aggregate, those donors will - # be omitted during the verbatim handover. Donors are a "hot" entity - # type, and we can't track their hubs in replica documents, so we - # rely on the inner entity IDs instead. - # - # FIXME: Enforce that hot entity types are completely aggregated - # https://github.com/DataBiosphere/azul/issues/6793 - return SetAccumulator(max_size=100) - else: - return super()._accumulator(field) - - -class OrganoidAggregator(SimpleAggregator): - pass - - -class ProjectAggregator(SimpleAggregator): - - def _accumulator(self, field) -> Accumulator | None: - if field == 'document_id': - return SetAccumulator(max_size=100) - elif field in ('project_description', - 'contact_names', - 'contributors', - 'publications'): - return None - elif field == 'estimated_cell_count': - return MaxAccumulator() - elif field == 'accessions': - return SetOfDictAccumulator(key=compose_keys(none_safe_key(), - none_safe_itemgetter('accession'))) - elif field == 'tissue_atlas': - return SetOfDictAccumulator(key=compose_keys(none_safe_key(), - none_safe_itemgetter('atlas'))) - else: - return super()._accumulator(field) - - -class ProtocolAggregator(SimpleAggregator): - - def _accumulator(self, field) -> Accumulator | None: - if field == 'assay_type': - return FrequencySetAccumulator(max_size=100) - elif field == 'document_id': - # If any protocol IDs are missing from the aggregate, those - # protocols may be omitted during the verbatim handover. Some - # protocols are "hot" entity types, and we can't track their hubs in - # replicas, so we rely on the inner entity IDs instead. - # - # FIXME: Enforce that hot entity types are completely aggregated - # https://github.com/DataBiosphere/azul/issues/6793 - return SetAccumulator(max_size=100) - else: - return super()._accumulator(field) - - def _default_accumulator(self) -> Accumulator | None: - return SetAccumulator() - - -class SequencingInputAggregator(SimpleAggregator): - pass - - -class SequencingProcessAggregator(SimpleAggregator): - - def _default_accumulator(self) -> Accumulator | None: - return SetAccumulator(max_size=10) - - -class MatricesAggregator(SimpleAggregator): - - def _accumulator(self, field) -> Accumulator | None: - if field == 'document_id': - return None - elif field == 'file': - return DictAccumulator(max_size=100, key=itemgetter('uuid')) - else: - return SetAccumulator() - - -class DateAggregator(SimpleAggregator): - - def _accumulator(self, field) -> Accumulator | None: - if field == 'document_id': - return None - elif field in ('submission_date', 'aggregate_submission_date'): - return MinAccumulator() - elif field in ('update_date', 'aggregate_update_date'): - return MaxAccumulator() - elif field in ('last_modified_date', 'aggregate_last_modified_date'): - return MaxAccumulator() - else: - return super()._accumulator(field) diff --git a/src/azul/plugins/metadata/hca/indexer/transform.py b/src/azul/plugins/metadata/hca/indexer/transform.py deleted file mode 100644 index 0e4c3e2197..0000000000 --- a/src/azul/plugins/metadata/hca/indexer/transform.py +++ /dev/null @@ -1,1772 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - Counter, - defaultdict, -) -from datetime import ( - datetime, -) -from enum import ( - Enum, -) -from itertools import ( - chain, -) -import logging -import re -from typing import ( - Callable, - Generic, - Iterable, - Iterator, - Mapping, - Protocol, - Self, - TypeVar, - get_args, -) -from uuid import ( - UUID, - uuid5, -) - -import attr -from more_itertools import ( - ilen, - one, - only, -) - -from azul import ( - R, - cached_property, - config, -) -from azul.collections import ( - OrderedSet, - none_safe_key, -) -from azul.enums import ( - auto, -) -from azul.indexer import ( - BundleFQID, - BundlePartition, -) -from azul.indexer.aggregate import ( - EntityAggregator, - SimpleAggregator, -) -from azul.indexer.document import ( - Contribution, - EntityID, - EntityReference, - EntityType, - Replica, -) -from azul.indexer.field import ( - ClosedRange, - FieldTypes, - Nested, - Nullable, - NullableString, - PassThrough, - null_bool, - null_datetime, - null_int, - null_str, - pass_thru_float, - pass_thru_int, - pass_thru_json, -) -from azul.indexer.transform import ( - ReplicaTransformer, - Transformer, -) -from azul.iterators import ( - generable, -) -from azul.openapi import ( - schema, -) -from azul.plugins.metadata.hca.bundle import ( - HCABundle, -) -from azul.plugins.metadata.hca.indexer.aggregate import ( - CellLineAggregator, - CellSuspensionAggregator, - DateAggregator, - DonorOrganismAggregator, - FileAggregator, - MatricesAggregator, - OrganoidAggregator, - ProjectAggregator, - ProtocolAggregator, - SampleAggregator, - SequencingInputAggregator, - SequencingProcessAggregator, - SpecimenAggregator, -) -from azul.plugins.metadata.hca.service.contributor_matrices import ( - parse_strata, -) -from azul.time import ( - format_dcp2_datetime, - parse_dcp2_version, -) -from azul.types import ( - JSON, - JSONs, - MutableJSON, -) -from humancellatlas.data.metadata import ( - api, -) - -log = logging.getLogger(__name__) - -Sample = api.CellLine | api.Organoid | api.SpecimenFromOrganism -sample_types = api.CellLine, api.Organoid, api.SpecimenFromOrganism -assert get_args(Sample) == sample_types # since we can't use * in generic types - -pass_thru_uuid4: PassThrough[api.UUID4] = PassThrough(str, es_type='keyword') - - -def _format_dcp2_datetime(d: datetime | None) -> str | None: - return None if d is None else format_dcp2_datetime(d) - - -class ValueAndUnit(Nullable[JSON, str]): - """ - The type of document fields whose values consist of a numeric quantity and - a symbolic unit, such as the age of a donor organism at collection time. - """ - - # FIXME: change the es_type for JSON to `nested` - # https://github.com/DataBiosphere/azul/issues/2621 - es_type = 'keyword' - - def to_index(self, value_unit: JSON | None) -> str: - """ - >>> a = ValueAndUnit(JSON, str) - >>> a.to_index({'value': '20', 'unit': 'year'}) - '20 year' - - >>> a.to_index({'value': '20', 'unit': None}) - '20' - - >>> a.to_index(None) - '~null' - - >>> a.to_index({}) - Traceback (most recent call last): - ... - AssertionError: R('Need dictionary with entries for `value` and `unit`') - - >>> a.to_index({'value': '1', 'unit': 'day', 'foo': 12}) - Traceback (most recent call last): - ... - AssertionError: R('Need dictionary with exactly two entries') - - >>> a.to_index({'unit': 'day'}) - Traceback (most recent call last): - ... - AssertionError: R('Need dictionary with entries for `value` and `unit`') - - >>> a.to_index({'value': '1'}) - Traceback (most recent call last): - ... - AssertionError: R('Need dictionary with entries for `value` and `unit`') - - >>> a.to_index({'value': '', 'unit': 'year'}) - Traceback (most recent call last): - ... - AssertionError: R('The `value` entry must not be empty') - - >>> a.to_index({'value': '20', 'unit': ''}) - Traceback (most recent call last): - ... - AssertionError: R('The `unit` entry must not be empty') - - >>> a.to_index({'value': None, 'unit': 'years'}) - Traceback (most recent call last): - ... - AssertionError: R('The `value` entry must not be null') - - >>> a.to_index({'value': 20, 'unit': None}) - Traceback (most recent call last): - ... - AssertionError: R('The `value` entry must be a string') - - >>> a.to_index({'value': '20', 'unit': True}) - Traceback (most recent call last): - ... - AssertionError: R('The `unit` entry must be a string') - - >>> a.to_index({'value': '20 ', 'unit': None}) - Traceback (most recent call last): - ... - AssertionError: R('The `value` entry must not contain spaces') - - >>> a.to_index({'value': '20', 'unit': 'years '}) - Traceback (most recent call last): - ... - AssertionError: R('The `unit` entry must not contain spaces') - """ - if value_unit is None: - return NullableString.null_string - else: - try: - value, unit = value_unit['value'], value_unit['unit'] - except KeyError: - assert False, R('Need dictionary with entries for `value` and `unit`') - else: - assert len(value_unit) == 2, R('Need dictionary with exactly two entries') - assert value != '', R('The `value` entry must not be empty') - assert unit != '', R('The `unit` entry must not be empty') - assert value is not None, R('The `value` entry must not be null') - assert type(value) is str, R('The `value` entry must be a string') - assert ' ' not in value, R('The `value` entry must not contain spaces') - if unit is None: - return value - else: - assert type(unit) is str, R('The `unit` entry must be a string') - assert ' ' not in unit, R('The `unit` entry must not contain spaces') - return f'{value} {unit}' - - def from_index(self, value: str) -> JSON | None: - """ - >>> a = ValueAndUnit(JSON, str) - >>> a.from_index('20 year') - {'value': '20', 'unit': 'year'} - - >>> a.from_index('20') - {'value': '20', 'unit': None} - - >>> a.from_index('~null') is None - True - - Although 'year' looks like a unit, we intentionally treat it like a - value because this class does not enforce any constraints on value or - unit other than it not contain spaces. - - >>> a.from_index('year') - {'value': 'year', 'unit': None} - - >>> a.from_index('20 ') - Traceback (most recent call last): - ... - ValueError: Expected exactly one item in iterable, but got '', '', and perhaps more. - - >>> a.from_index(' year') - Traceback (most recent call last): - ... - AssertionError - - >>> a.from_index('1 ') - Traceback (most recent call last): - ... - AssertionError - - >>> a.from_index('') - Traceback (most recent call last): - ... - AssertionError - """ - if value == NullableString.null_string: - return None - else: - i = iter(value.split(' ')) - value = next(i) - # only() fails with more than one item left in the iterator - unit = only(i) - assert value, value - assert unit is None or unit, unit - return {'value': value, 'unit': unit} - - def to_tsv(self, value: JSON | None) -> str: - return '' if value is None else self.to_index(value) - - @property - def api_schema(self) -> JSON: - return schema.object(value=str, unit=str) - - -value_and_unit: ValueAndUnit = ValueAndUnit(JSON, str) - -accession: Nested = Nested(namespace=null_str, accession=null_str) - -tissue_atlas: Nested = Nested(atlas=null_str, version=null_str) - -age_range = ClosedRange(pass_thru_float) - - -class SubmitterCategory(Enum): - """ - The types of submitters, such as internal (submitter of DCP generated - matrices) and external (submitter of contributor generated matrices). - """ - internal = auto() - external = auto() - - -class SubmitterBase: - # These class attributes must be defined in a superclass because Enum and - # EnumMeta would get confused if they were defined in the Enum subclass. - by_id: dict[str, 'Submitter'] = {} - by_title: dict[str, 'Submitter'] = {} - id_namespace = UUID('382415e5-67a6-49be-8f3c-aaaa707d82db') - - -class Submitter(SubmitterBase, Enum): - """ - The known submitters of data files, specifically matrix files. - """ - # A submitter's ID is derived from its slug. We hard-code it for the sake of - # documenting it. The constructor ensures the hard-coded value is correct. - - arrayexpress = ( - 'b7525d8e-8c7a-5fec-911a-323e5c3a79f7', - 'ArrayExpress', - SubmitterCategory.external - ) - contributor = ( - 'f180f1c3-9073-54a9-9bab-633008c307cc', - 'Contributor', - SubmitterCategory.external - ) - geo = ( - '21b9424e-4043-5e80-85d0-1f0449430b57', - 'GEO', - SubmitterCategory.external - ) - hca_release = ( - '656db407-02f1-547c-9840-6908c4f09ce8', - 'HCA Release', - SubmitterCategory.external - ) - scea = ( - '099feafe-ab42-5fb1-bff5-dbbe5ea61a0d', - 'SCEA', - SubmitterCategory.external - ) - scp = ( - '3d76d2d3-51f4-5b17-85c8-f3549a7ab716', - 'SCP', - SubmitterCategory.external - ) - dcp2 = ( - 'e67aaabe-93ea-564a-aa66-31bc0857b707', - 'DCP/2 Analysis', - SubmitterCategory.internal - ) - dcp2_ingest = ( - '8d59f7a5-6245-5e42-9bc0-a53dd8a10f28', - 'DCP/2 Ingest', - SubmitterCategory.internal - ) - dcp1_matrix_service = ( - 'c9efbb15-c50c-5796-8d15-35e9e1219dc5', - 'DCP/1 Matrix Service', - SubmitterCategory.internal - ) - lungmap = ( - '31ad7d2c-7262-54aa-92df-7f16418f3b84', - 'LungMAP', - SubmitterCategory.external - ) - zenodo = ( - 'bd24572b-a535-5ff8-b167-0e43d7f0d4b0', - 'Zenodo', - SubmitterCategory.external - ) - publication = ( - '210ca4c7-f6f6-5a0d-8b1c-88ab5349a8f3', - 'Publication', - SubmitterCategory.external - ) - - def __init__(self, id: str, title: str, category: SubmitterCategory): - super().__init__() - slug = self.name.replace('_', ' ') - generated_uuid = str(uuid5(self.id_namespace, slug)) - assert id == generated_uuid, (id, generated_uuid) - self.id = id - self.slug = slug - self.title = title - self.category = category - assert title not in self.by_title, title - self.by_title[title] = self - self.by_id[id] = self - - @classmethod - def for_id(cls, submitter_id: str) -> Self | None: - try: - return cls.by_id[submitter_id] - except KeyError: - return None - - @classmethod - def for_file(cls, file: api.File) -> Self | None: - if file.file_source is None: - if ( - # The DCP/2 system design specification mistakenly required that - # intermediate matrices generated by the DCP/2 Analysis do not - # carry any submitter_id: - # - # > Any intermediate matrices created during the processing are - # > described as analysis_file, but the - # > analysis_file.provenance.submitter_id property is omitted. - # - # https://github.com/HumanCellAtlas/dcp2/blob/main/docs/dcp2_system_design.rst#52dcp2-generated-matrices - # - # This heuristic attempts to retroactively assign the `dcp2` - # submitter ID to all analysis files produced by DCP/2 Analysis, - # not just intermediate matrices but also BAMs and other - # intermediate files. - file.submitter_id is None - and isinstance(file, api.AnalysisFile) - and any(isinstance(p, api.AnalysisProcess) - for p in file.from_processes.values()) - ): - self = cls.dcp2 - else: - self = cls.for_id(file.submitter_id) - else: - self = cls.by_title[file.file_source] - return self - - @classmethod - def title_for_file(cls, file: api.File) -> str | None: - self = cls.for_file(file) - return None if self is None else self.title - - @classmethod - def category_for_file(cls, file: api.File) -> SubmitterCategory | None: - self = cls.for_file(file) - if self is None: - return None - else: - return self.category - - -class Entity(Protocol): - document_id: api.UUID4 - - -class DatedEntity(Entity, Protocol): - submission_date: datetime - update_date: datetime - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class BaseTransformer(Transformer, metaclass=ABCMeta): - bundle: HCABundle - api_bundle: api.Bundle - - @classmethod - def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None: - if entity_type == 'files': - agg_cls = FileAggregator - elif entity_type in SampleTransformer.inner_entity_types(): - agg_cls = SampleAggregator - elif entity_type == 'specimens': - agg_cls = SpecimenAggregator - elif entity_type == 'cell_suspensions': - agg_cls = CellSuspensionAggregator - elif entity_type == 'cell_lines': - agg_cls = CellLineAggregator - elif entity_type == 'donors': - agg_cls = DonorOrganismAggregator - elif entity_type == 'organoids': - agg_cls = OrganoidAggregator - elif entity_type == 'projects': - agg_cls = ProjectAggregator - elif entity_type in ( - 'analysis_protocols', - 'imaging_protocols', - 'library_preparation_protocols', - 'sequencing_protocols' - ): - agg_cls = ProtocolAggregator - elif entity_type == 'sequencing_inputs': - agg_cls = SequencingInputAggregator - elif entity_type == 'sequencing_processes': - agg_cls = SequencingProcessAggregator - elif entity_type in ('matrices', 'contributed_analyses'): - agg_cls = MatricesAggregator - elif entity_type == 'dates': - agg_cls = DateAggregator - else: - agg_cls = SimpleAggregator - return agg_cls(cls.entity_type(), entity_type) - - def _replica_contents(self, entity: EntityReference) -> JSON: - if entity == self.api_bundle.ref: - return self.bundle.links - else: - api_entity = self.api_bundle.entities[UUID(entity.entity_id)] - return api_entity.json - - def _find_ancestor_samples(self, - entity: api.LinkedEntity, - samples: dict[str, Sample] - ): - """ - Populate the `samples` argument with the sample ancestors of the given - entity. A sample is any biomaterial that is neither a cell suspension - nor an ancestor of another sample. - - :param entity: the entity whose ancestor samples should be found - - :param samples: the dictionary into which to place found ancestor - samples, by their document ID - """ - if isinstance(entity, sample_types): - samples[str(entity.document_id)] = entity - else: - for parent in entity.parents.values(): - self._find_ancestor_samples(parent, samples) - - def _visit_file(self, file): - visitor = TransformerVisitor() - file.accept(visitor) - file.ancestors(visitor) - samples: dict[str, Sample] = dict() - self._find_ancestor_samples(file, samples) - return visitor, samples - - def __dates(self, entity: DatedEntity) -> MutableJSON: - dates = (entity.submission_date, entity.update_date) - last_modified_date = max(filter(None, dates)) - return { - 'submission_date': format_dcp2_datetime(entity.submission_date), - 'update_date': _format_dcp2_datetime(entity.update_date), - 'last_modified_date': format_dcp2_datetime(last_modified_date) - } - - def __aggregate_dates(self, entities: Iterable[DatedEntity]) -> MutableJSON: - submission_dates = {entity.submission_date for entity in entities} - update_dates = {entity.update_date for entity in entities} - dates = submission_dates | update_dates - agg_last_modified_date = max(filter(None, dates), default=None) - agg_submission_date = min(submission_dates, default=None) - agg_update_date = max(filter(None, update_dates), default=None) - return { - 'aggregate_last_modified_date': _format_dcp2_datetime(agg_last_modified_date), - 'aggregate_submission_date': _format_dcp2_datetime(agg_submission_date), - 'aggregate_update_date': _format_dcp2_datetime(agg_update_date), - } - - @classmethod - def _date_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'aggregate_last_modified_date': null_datetime, - 'aggregate_submission_date': null_datetime, - 'aggregate_update_date': null_datetime, - 'submission_date': null_datetime, - 'update_date': null_datetime, - 'last_modified_date': null_datetime, - } - - def _date(self, entity: DatedEntity) -> MutableJSON: - return { - **self._entity(entity), - **self.__dates(entity), - **self.__aggregate_dates(self._dated_entities()) - } - - def _dated_entities(self) -> Iterable[DatedEntity]: - # Only containers have dated entities - return [] - - @classmethod - def _entity_types(cls) -> FieldTypes: - return { - 'document_id': null_str, - } - - def _entity(self, entity: Entity): - return { - 'document_id': str(entity.document_id), - } - - @classmethod - def _biomaterial_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'biomaterial_id': null_str, - } - - def _biomaterial(self, biomaterial: api.Biomaterial): - return { - **self._entity(biomaterial), - 'biomaterial_id': str(biomaterial.biomaterial_id), - } - - @classmethod - def _contact_types(cls) -> FieldTypes: - return { - 'contact_name': null_str, - 'corresponding_contributor': null_bool, - 'email': null_str, - 'institution': null_str, - 'laboratory': null_str, - 'project_role': null_str - } - - def _contact(self, p: api.ProjectContact): - # noinspection PyDeprecation - return { - 'contact_name': p.contact_name, - 'corresponding_contributor': p.corresponding_contributor, - 'email': p.email, - 'institution': p.institution, - 'laboratory': p.laboratory, - 'project_role': p.project_role - } - - @classmethod - def _publication_types(cls) -> FieldTypes: - return { - 'publication_title': null_str, - 'publication_url': null_str, - 'official_hca_publication': null_bool, - 'doi': null_str - } - - def _publication(self, p: api.ProjectPublication): - # noinspection PyDeprecation - return { - 'publication_title': p.publication_title, - 'publication_url': p.publication_url, - 'official_hca_publication': p.official_hca, - 'doi': p.doi - } - - def _accession(self, p: api.Accession): - return { - 'namespace': p.namespace, - 'accession': p.accession - } - - def _tissue_atlas(self, b: api.Bionetwork): - return { - 'atlas': b.hca_tissue_atlas, - 'version': b.hca_tissue_atlas_version - } - - @classmethod - def _project_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'project_title': null_str, - 'project_description': null_str, - 'project_short_name': null_str, - 'laboratory': [null_str], - 'institutions': [null_str], - 'contact_names': [null_str], - 'contributors': cls._contact_types(), - 'publication_titles': [null_str], - 'publications': cls._publication_types(), - 'supplementary_links': [null_str], - '_type': null_str, - 'accessions': [accession], - 'is_tissue_atlas_project': null_bool, - 'tissue_atlas': [tissue_atlas], - 'bionetwork_name': [null_str], - 'estimated_cell_count': null_int, - 'data_use_restriction': null_str, - 'duos_id': null_str - } - - def _project(self, project: api.Project) -> MutableJSON: - # Store lists of all values of each of these facets to allow facet filtering - # and term counting on the webservice - laboratories: OrderedSet[str] = OrderedSet() - institutions: OrderedSet[str] = OrderedSet() - contact_names: OrderedSet[str] = OrderedSet() - publication_titles: OrderedSet[str] = OrderedSet() - - for contributor in project.contributors: - if contributor.laboratory: - laboratories.add(contributor.laboratory) - # noinspection PyDeprecation - if contributor.contact_name: - # noinspection PyDeprecation - contact_names.add(contributor.contact_name) - if contributor.institution: - institutions.add(contributor.institution) - - for publication in project.publications: - # noinspection PyDeprecation - if publication.publication_title: - # noinspection PyDeprecation - publication_titles.add(publication.publication_title) - - return { - **self._entity(project), - 'project_title': project.project_title, - # FIXME: Omit large project fields from non-project contributions - # https://github.com/DataBiosphere/azul/issues/5346 - 'project_description': project.project_description, - 'project_short_name': project.project_short_name, - 'laboratory': list(laboratories), - 'institutions': list(institutions), - 'contact_names': list(contact_names), - 'contributors': list(map(self._contact, project.contributors)), - 'publication_titles': list(publication_titles), - 'publications': list(map(self._publication, project.publications)), - 'supplementary_links': sorted(project.supplementary_links), - '_type': 'project', - 'accessions': list(map(self._accession, project.accessions)), - 'is_tissue_atlas_project': any(bionetwork.atlas_project - for bionetwork in project.bionetworks), - 'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)), - 'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks), - 'estimated_cell_count': project.estimated_cell_count, - 'data_use_restriction': project.data_use_restriction, - 'duos_id': project.duos_id - } - - @classmethod - def _specimen_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'has_input_biomaterial': null_str, - '_source': null_str, - 'disease': [null_str], - 'organ': null_str, - 'organ_part': [null_str], - 'storage_method': null_str, - 'preservation_method': null_str, - '_type': null_str - } - - def _specimen(self, specimen: api.SpecimenFromOrganism) -> MutableJSON: - return { - **self._biomaterial(specimen), - 'has_input_biomaterial': specimen.has_input_biomaterial, - '_source': api.schema_names[type(specimen)], - 'disease': sorted(specimen.diseases), - 'organ': specimen.organ, - 'organ_part': sorted(specimen.organ_parts), - 'storage_method': specimen.storage_method, - 'preservation_method': specimen.preservation_method, - '_type': 'specimen' - } - - cell_count_fields = [ - ('total_estimated_cells', True), - ('total_estimated_cells_redundant', False) - ] - - @classmethod - def _cell_suspension_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - **{field: null_int for field, _ in cls.cell_count_fields}, - 'selected_cell_type': [null_str], - 'organ': [null_str], - 'organ_part': [null_str] - } - - def _cell_suspension(self, cell_suspension: api.CellSuspension) -> MutableJSON: - organs = set() - organ_parts = set() - samples: dict[str, Sample] = dict() - self._find_ancestor_samples(cell_suspension, samples) - for sample in samples.values(): - if isinstance(sample, api.SpecimenFromOrganism): - organs.add(sample.organ) - organ_parts.update(sample.organ_parts) - elif isinstance(sample, api.CellLine): - organs.add(sample.model_organ) - organ_parts.add(None) - elif isinstance(sample, api.Organoid): - organs.add(sample.model_organ) - organ_parts.add(sample.model_organ_part) - else: - assert False - is_leaf = cell_suspension.document_id in self.api_bundle.leaf_cell_suspensions - return { - **self._biomaterial(cell_suspension), - **{ - field: cell_suspension.estimated_cell_count if is_leaf_field == is_leaf else 0 - for field, is_leaf_field in self.cell_count_fields - }, - 'selected_cell_type': sorted(cell_suspension.selected_cell_types), - 'organ': sorted(organs), - # With multiple samples it is possible to have str and None values - 'organ_part': sorted(organ_parts, key=none_safe_key(none_last=True)) - } - - @classmethod - def _cell_line_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'cell_line_type': null_str, - 'model_organ': null_str - } - - def _cell_line(self, cell_line: api.CellLine) -> MutableJSON: - # noinspection PyDeprecation - return { - **self._biomaterial(cell_line), - 'cell_line_type': cell_line.cell_line_type, - 'model_organ': cell_line.model_organ - } - - @classmethod - def _donor_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'biological_sex': null_str, - 'genus_species': [null_str], - 'development_stage': null_str, - 'diseases': [null_str], - 'organism_age': value_and_unit, - # Prevent problem due to shadow copies on numeric ranges - 'organism_age_range': age_range, - 'donor_count': null_int - } - - def _donor(self, donor: api.DonorOrganism) -> MutableJSON: - if donor.organism_age is None: - assert donor.organism_age_unit is None, R('Unit must be None if value is') - organism_age = None - else: - organism_age = { - 'value': donor.organism_age, - 'unit': donor.organism_age_unit - } - return { - **self._biomaterial(donor), - 'biological_sex': donor.sex, - 'genus_species': sorted(donor.genus_species), - 'development_stage': donor.development_stage, - 'diseases': sorted(donor.diseases), - 'organism_age': organism_age, - **( - { - 'organism_age_range': ( - donor.organism_age_in_seconds.min, - donor.organism_age_in_seconds.max - ) - } if donor.organism_age_in_seconds else { - } - ) - } - - @classmethod - def _organoid_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'model_organ': null_str, - 'model_organ_part': null_str - } - - def _organoid(self, organoid: api.Organoid) -> MutableJSON: - return { - **self._biomaterial(organoid), - 'model_organ': organoid.model_organ, - 'model_organ_part': organoid.model_organ_part - } - - def _is_intermediate_matrix(self, file: api.File) -> bool | None: - if file.is_matrix: - if isinstance(file, api.SupplementaryFile): - # Non-organic CGM - is_intermediate = False - elif isinstance(file, api.AnalysisFile): - if ( - any(isinstance(p, api.AnalysisProcess) for p in file.to_processes.values()) - # As per DCP/2 System Design, intermediate matrices generated by - # DCP/2 analysis do not carry a submitter ID. Also see Submitter.for_file - or (file.submitter_id is None and Submitter.for_file(file) == Submitter.dcp2) - ): - # Intermediate DCP/2-generated matrix - is_intermediate = True - else: - # Organic CGM or final DCP/2-generated matrix - is_intermediate = False - else: - assert False, file - else: - # Not a matrix - is_intermediate = None - return is_intermediate - - @classmethod - def _file_base_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'content-type': null_str, - 'indexed': null_bool, - 'name': null_str, - 'crc32c': null_str, - 'sha256': null_str, - 'size': null_int, - 'uuid': pass_thru_uuid4, - 'drs_uri': null_str, - 'version': null_str, - 'file_type': null_str, - 'file_format': null_str, - 'content_description': [null_str], - 'is_intermediate': null_bool, - 'file_source': null_str, - '_type': null_str, - 'read_index': null_str, - 'lane_index': null_int, - 'matrix_cell_count': null_int - } - - def _file_base(self, file: api.File) -> MutableJSON: - # noinspection PyDeprecation - return { - **self._entity(file), - 'content-type': file.manifest_entry.content_type, - 'indexed': file.manifest_entry.indexed, - 'name': file.manifest_entry.name, - 'crc32c': file.manifest_entry.crc32c, - 'sha256': file.manifest_entry.sha256, - 'size': file.manifest_entry.size, - 'uuid': file.manifest_entry.uuid, - 'drs_uri': self.bundle.drs_uri(file.manifest_entry.json), - 'version': file.manifest_entry.version, - 'file_type': file.schema_name, - 'file_format': file.file_format, - 'content_description': sorted(file.content_description), - 'is_intermediate': self._is_intermediate_matrix(file), - 'file_source': Submitter.title_for_file(file), - '_type': 'file', - **( - { - 'read_index': file.read_index, - 'lane_index': file.lane_index - } if isinstance(file, api.SequenceFile) else { - } - ), - **( - { - 'matrix_cell_count': file.matrix_cell_count - } if isinstance(file, api.AnalysisFile) else { - } - ), - } - - @classmethod - def _file_types(cls) -> FieldTypes: - return { - **cls._file_base_types(), - # Pass through field added by FileAggregator, will never be None - 'count': pass_thru_int, - 'related_files': cls._related_file_types(), - } - - def _file(self, - file: api.File, - related_files: Iterable[api.File] = () - ) -> MutableJSON: - # noinspection PyDeprecation - return { - **self._file_base(file), - 'related_files': list(map(self._related_file, related_files)), - } - - @classmethod - def _related_file_types(cls) -> FieldTypes: - return cls._file_base_types() - - def _related_file(self, file: api.File) -> MutableJSON: - return self._file_base(file) - - @classmethod - def _analysis_protocol_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'workflow': null_str - } - - def _analysis_protocol(self, protocol: api.AnalysisProtocol) -> MutableJSON: - return { - **self._entity(protocol), - 'workflow': protocol.protocol_id - } - - @classmethod - def _imaging_protocol_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - # Pass through counter used to produce a FrequencySetAccumulator - 'assay_type': pass_thru_json - } - - def _imaging_protocol(self, protocol: api.ImagingProtocol) -> MutableJSON: - return { - **self._entity(protocol), - 'assay_type': dict(Counter(probe.assay_type for probe in protocol.probe)) - } - - @classmethod - def _library_preparation_protocol_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'library_construction_approach': null_str, - 'nucleic_acid_source': null_str - } - - def _library_preparation_protocol(self, - protocol: api.LibraryPreparationProtocol - ) -> MutableJSON: - return { - **self._entity(protocol), - 'library_construction_approach': protocol.library_construction_method, - 'nucleic_acid_source': protocol.nucleic_acid_source - } - - @classmethod - def _sequencing_protocol_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - 'instrument_manufacturer_model': null_str, - 'paired_end': null_bool - } - - def _sequencing_protocol(self, protocol: api.SequencingProtocol) -> MutableJSON: - return { - **self._entity(protocol), - 'instrument_manufacturer_model': protocol.instrument_manufacturer_model, - 'paired_end': protocol.paired_end - } - - @classmethod - def _sequencing_process_types(cls) -> FieldTypes: - return { - **cls._entity_types(), - } - - def _sequencing_process(self, process: api.Process) -> MutableJSON: - return { - **self._entity(process), - } - - @classmethod - def _sequencing_input_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'sequencing_input_type': null_str, - } - - def _sequencing_input(self, sequencing_input: api.Biomaterial) -> MutableJSON: - return { - **self._biomaterial(sequencing_input), - 'sequencing_input_type': api.schema_names[type(sequencing_input)] - } - - @classmethod - def _sample_types(cls) -> FieldTypes: - return { - **cls._biomaterial_types(), - 'entity_type': null_str, - 'organ': null_str, - 'organ_part': [null_str], - 'model_organ': null_str, - 'model_organ_part': null_str, - 'effective_organ': null_str, - } - - class Sample: - entity_type: str - api_class: type[api.Biomaterial] - - @classmethod - def to_dict(cls, sample: api.Biomaterial) -> MutableJSON: - assert isinstance(sample, cls.api_class) - return { - 'document_id': sample.document_id, - 'biomaterial_id': sample.biomaterial_id, - 'entity_type': cls.entity_type, - } - - class SampleCellLine(Sample): - entity_type = 'cell_lines' - api_class = api.CellLine - - @classmethod - def to_dict(cls, cellline: api_class) -> MutableJSON: - return { - **super().to_dict(cellline), - 'organ': None, - 'organ_part': [], - 'model_organ': cellline.model_organ, - 'model_organ_part': None, - 'effective_organ': cellline.model_organ, - } - - class SampleOrganoid(Sample): - entity_type = 'organoids' - api_class = api.Organoid - - @classmethod - def to_dict(cls, organoid: api_class) -> MutableJSON: - return { - **super().to_dict(organoid), - 'organ': None, - 'organ_part': [], - 'model_organ': organoid.model_organ, - 'model_organ_part': organoid.model_organ_part, - 'effective_organ': organoid.model_organ, - } - - class SampleSpecimen(Sample): - entity_type = 'specimens' - api_class = api.SpecimenFromOrganism - - @classmethod - def to_dict(cls, specimen: api_class) -> MutableJSON: - return { - **super().to_dict(specimen), - 'organ': specimen.organ, - 'organ_part': sorted(specimen.organ_parts), - 'model_organ': None, - 'model_organ_part': None, - 'effective_organ': specimen.organ, - } - - sample_types: Mapping[Callable, type[Sample]] = { - _cell_line: SampleCellLine, - _organoid: SampleOrganoid, - _specimen: SampleSpecimen - } - - def _samples(self, samples: Iterable[api.Biomaterial]) -> MutableJSON: - """ - Returns inner entities representing the given samples as both, generic - 'samples' inner entities and specific 'sample_{entity_type}' entities. - A 'samples' inner entity is a polymorphic structure containing - the properties common to all samples. This allows filtering on these - common properties regardless of the sample entity type. - """ - result = defaultdict(list) - for sample in samples: - for to_dict, sample_type in self.sample_types.items(): - if isinstance(sample, sample_type.api_class): - entity_type = f'sample_{sample_type.entity_type}' - result[entity_type].append(to_dict(self, sample)) - result['samples'].append(sample_type.to_dict(sample)) - break - else: - assert False, sample - return result - - @classmethod - def _matrix_types(cls) -> FieldTypes: - return { - 'document_id': null_str, - 'file': { - **cls._file_types(), - 'strata': null_str - } - } - - def _matrix(self, file: api.File) -> MutableJSON: - if isinstance(file, api.SupplementaryFile): - # Stratification values for supplementary files are - # provided in the 'file_description' field of the file JSON. - strata_string = file.json['file_description'] - elif isinstance(file, api.File): - # Stratification values for other file types are gathered by - # visiting the file and using values from the graph. - strata_string = self._build_strata_string(file) - else: - assert False, type(file) - return { - 'document_id': str(file.document_id), - # These values are grouped together in a dict so when the dicts are - # aggregated together we will have preserved the grouping of values. - 'file': { - **self._file(file), - 'strata': strata_string - } - } - - dimension_value_re = re.compile(r'[^,=;\n]+') - - def _build_strata_string(self, file): - visitor, samples = self._visit_file(file) - points = { - 'genusSpecies': { - genus_species - for donor in visitor.donors.values() - for genus_species in donor.genus_species - }, - 'developmentStage': { - donor.development_stage - for donor in visitor.donors.values() - if donor.development_stage is not None - }, - 'organ': { - sample.organ if hasattr(sample, 'organ') else sample.model_organ - for sample in samples.values() - }, - 'libraryConstructionApproach': { - protocol.library_construction_method - for protocol in visitor.library_preparation_protocols.values() - } - } - point_strings = [] - for dimension, values in points.items(): - if values: - for value in values: - assert self.dimension_value_re.fullmatch(value), value - point_strings.append(dimension + '=' + ','.join(sorted(values))) - return ';'.join(point_strings) - - @classmethod - def field_types(cls) -> FieldTypes: - """ - Field types outline the general shape of our documents. - """ - # FIXME: Not all information is captured. Lists of primitive types are - # represented, but lists of container types are not. Eventually, - # we want field_types to more accurately describe the shape of - # the documents, in particular the contributions. - # https://github.com/DataBiosphere/azul/issues/2689 - return { - 'samples': cls._sample_types(), - 'sample_cell_lines': cls._cell_line_types(), - 'sample_organoids': cls._organoid_types(), - 'sample_specimens': cls._specimen_types(), - 'sequencing_inputs': cls._sequencing_input_types(), - 'specimens': cls._specimen_types(), - 'cell_suspensions': cls._cell_suspension_types(), - 'cell_lines': cls._cell_line_types(), - 'donors': cls._donor_types(), - 'organoids': cls._organoid_types(), - 'files': cls._file_types(), - 'analysis_protocols': cls._analysis_protocol_types(), - 'imaging_protocols': cls._imaging_protocol_types(), - 'library_preparation_protocols': cls._library_preparation_protocol_types(), - 'sequencing_protocols': cls._sequencing_protocol_types(), - 'sequencing_processes': cls._sequencing_process_types(), - 'total_estimated_cells': pass_thru_int, - 'matrices': cls._matrix_types(), - 'contributed_analyses': cls._matrix_types(), - 'projects': cls._project_types(), - 'dates': cls._date_types(), - } - - def _protocols(self, visitor) -> Mapping[str, JSONs]: - return { - p + 's': list(map(getattr(self, '_' + p), getattr(visitor, p + 's').values())) - for p in ( - 'analysis_protocol', - 'imaging_protocol', - 'library_preparation_protocol', - 'sequencing_protocol' - ) - } - - @classmethod - def validate_class(cls): - # Manifest generation depends on this: - assert cls._related_file_types().keys() <= cls._file_types().keys() - - @cached_property - def _api_project(self) -> api.Project: - return one(self.api_bundle.projects.values()) - - @classmethod - def inner_entity_id(cls, entity_type: EntityType, entity: JSON) -> EntityID: - return entity['document_id'] - - @classmethod - def reconcile_inner_entities(cls, - entity_type: EntityType, - *, - this: tuple[JSON, BundleFQID], - that: tuple[JSON, BundleFQID] - ) -> tuple[JSON, BundleFQID]: - this_entity, this_bundle = this - that_entity, that_bundle = that - if that_entity.keys() != this_entity.keys(): - mismatch = set(that_entity.keys()).symmetric_difference(this_entity) - log.warning('Document shape of `%s` this_entity `%s` ' - 'does not match between bundles %r and %r, ' - 'the mismatched properties being: %s', - entity_type, cls.inner_entity_id(entity_type, this_entity), - this_bundle, that_bundle, - mismatch) - return that if that_bundle.version > this_bundle.version else this - - -BaseTransformer.validate_class() - - -def _parse_zarr_file_name(file_name: str - ) -> tuple[bool, str | None, str | None]: - file_name = file_name.split('.zarr/') - if len(file_name) == 1: - return False, None, None - elif len(file_name) == 2: - zarr_name, sub_name = file_name - return True, zarr_name, sub_name - else: - assert False - - -class TransformerVisitor(api.EntityVisitor): - # Entities are tracked by ID to ensure uniqueness if an entity is visited - # twice while descending the entity DAG - specimens: dict[api.UUID4, api.SpecimenFromOrganism] - cell_suspensions: dict[api.UUID4, api.CellSuspension] - cell_lines: dict[api.UUID4, api.CellLine] - donors: dict[api.UUID4, api.DonorOrganism] - organoids: dict[api.UUID4, api.Organoid] - analysis_protocols: dict[api.UUID4, api.AnalysisProtocol] - imaging_protocols: dict[api.UUID4, api.ImagingProtocol] - library_preparation_protocols: dict[api.UUID4, api.LibraryPreparationProtocol] - sequencing_inputs: dict[api.UUID4, api.Biomaterial] - sequencing_protocols: dict[api.UUID4, api.SequencingProtocol] - sequencing_processes: dict[api.UUID4, api.Process] - files: dict[api.UUID4, api.File] - - def __init__(self) -> None: - self.specimens = {} - self.cell_suspensions = {} - self.cell_lines = {} - self.donors = {} - self.organoids = {} - self.analysis_protocols = {} - self.imaging_protocols = {} - self.library_preparation_protocols = {} - self.sequencing_inputs = {} - self.sequencing_protocols = {} - self.sequencing_processes = {} - self.files = {} - - def visit(self, entity: api.Entity) -> None: - if ( - isinstance(entity, api.Biomaterial) - and any(isinstance(protocol, api.SequencingProtocol) - for process in entity.to_processes.values() - for protocol in process.protocols.values()) - ): - self.sequencing_inputs[entity.document_id] = entity - if isinstance(entity, api.SpecimenFromOrganism): - self.specimens[entity.document_id] = entity - elif isinstance(entity, api.CellSuspension): - self.cell_suspensions[entity.document_id] = entity - elif isinstance(entity, api.CellLine): - self.cell_lines[entity.document_id] = entity - elif isinstance(entity, api.DonorOrganism): - self.donors[entity.document_id] = entity - elif isinstance(entity, api.Organoid): - self.organoids[entity.document_id] = entity - elif isinstance(entity, api.Process): - if entity.is_sequencing_process(): - self.sequencing_processes[entity.document_id] = entity - for protocol in entity.protocols.values(): - if isinstance(protocol, api.AnalysisProtocol): - self.analysis_protocols[protocol.document_id] = protocol - elif isinstance(protocol, api.ImagingProtocol): - self.imaging_protocols[protocol.document_id] = protocol - elif isinstance(protocol, api.LibraryPreparationProtocol): - self.library_preparation_protocols[protocol.document_id] = protocol - elif isinstance(protocol, api.SequencingProtocol): - self.sequencing_protocols[protocol.document_id] = protocol - elif isinstance(entity, api.File): - # noinspection PyDeprecation - file_name = entity.manifest_entry.name - is_zarr, zarr_name, sub_name = _parse_zarr_file_name(file_name) - # zarray files no longer exist in DCP2. This condition may no longer - # be needed to support them, but we don't want to risk removing it. - if not is_zarr or sub_name.endswith('.zattrs'): - self.files[entity.document_id] = entity - - @property - def entities(self) -> Iterable[EntityReference]: - # FIXME: Some replicas are still missing for HCA - # https://github.com/DataBiosphere/azul/issues/6597 - for entity_dict in vars(self).values(): - for entity in entity_dict.values(): - yield EntityReference(entity_type=entity.schema_name, - entity_id=str(entity.document_id)) - - -ENTITY = TypeVar('ENTITY', bound=api.Entity) - - -class PartitionedTransformer(BaseTransformer, Generic[ENTITY]): - - @abstractmethod - def _transform(self, - entities: Iterable[ENTITY] - ) -> Iterable[Contribution | Replica]: - """ - Transform the given outer entities into contributions. - """ - raise NotImplementedError - - @abstractmethod - def _entities(self) -> Iterable[ENTITY]: - """ - Return all outer entities of interest in the bundle. - """ - raise NotImplementedError - - def _entities_in(self, partition: BundlePartition) -> Iterator[ENTITY]: - return (e for e in self._entities() if partition.contains(e.document_id)) - - def estimate(self, partition: BundlePartition) -> int: - return ilen(self._entities_in(partition)) - - def transform(self, - partition: BundlePartition - ) -> Iterable[Contribution | Replica]: - return self._transform(generable(self._entities_in, partition)) - - -class FileTransformer(PartitionedTransformer[api.File], ReplicaTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'files' - - @classmethod - def hot_entity_types(cls) -> dict[EntityType, EntityType]: - return { - 'project': 'projects', - 'donor_organism': 'donors', - **{ - f'{protocol_type}_protocol': f'{protocol_type}_protocols' - for protocol_type in ['analysis', 'imaging', 'library_preparation', 'sequencing'] - } - } - - def _entities(self) -> Iterable[api.File]: - return self.api_bundle.not_stitched(self.api_bundle.files) - - def _transform(self, - files: Iterable[api.File] - ) -> Iterable[Contribution | Replica]: - zarr_stores: Mapping[str, list[api.File]] = self.group_zarrs(files) - for file in files: - file_name = file.manifest_entry.name - is_zarr, zarr_name, sub_name = _parse_zarr_file_name(file_name) - # zarray files no longer exist in DCP2. This condition may no longer - # be needed to support them, but we don't want to risk removing it. - if not is_zarr or sub_name.endswith('.zattrs'): - if is_zarr: - # This is the representative file, so add the related files - related_files = zarr_stores[zarr_name] - else: - related_files = () - visitor, samples = self._visit_file(file) - contents = dict(self._samples(samples.values()), - sequencing_inputs=list( - map(self._sequencing_input, visitor.sequencing_inputs.values()) - ), - specimens=list(map(self._specimen, visitor.specimens.values())), - cell_suspensions=list(map(self._cell_suspension, visitor.cell_suspensions.values())), - cell_lines=list(map(self._cell_line, visitor.cell_lines.values())), - donors=list(map(self._donor, visitor.donors.values())), - organoids=list(map(self._organoid, visitor.organoids.values())), - files=[self._file(file, related_files=related_files)], - **self._protocols(visitor), - sequencing_processes=list( - map(self._sequencing_process, visitor.sequencing_processes.values()) - ), - dates=[self._date(file)], - projects=[self._project(self._api_project)]) - # Supplementary file matrices provide stratification values that - # need to be reflected by inner entities in the contribution. - if isinstance(file, api.SupplementaryFile) and file.is_matrix: - if Submitter.category_for_file(file) in ( - SubmitterCategory.internal, - SubmitterCategory.external - ): - additional_contents = self.matrix_stratification_values(file) - for entity_type, values in additional_contents.items(): - contents[entity_type].extend(values) - file_id = file.ref.entity_id - yield self._contribution(contents, file_id) - if config.enable_replicas: - project_ref = self._api_project.ref - project_id = project_ref.entity_id - for ref in chain([project_ref, self.api_bundle.ref], visitor.entities): - file_hub = None if ref.entity_type in self.hot_entity_types() else file_id - yield self._replica(ref, file_hub=file_hub, root_hub=project_id) - - def matrix_stratification_values(self, file: api.File) -> JSON: - """ - Returns inner entity values (contents) read from the stratification - values provided by a supplementary file project-level matrix. - """ - contents = defaultdict(list) - file_description = file.json.get('file_description') - if file_description: - file_name = file.manifest_entry.name - strata = parse_strata(file_description) - for stratum in strata: - donor = {} - genus_species = stratum.get('genusSpecies') - if genus_species is not None: - donor['genus_species'] = sorted(genus_species) - development_stage = stratum.get('developmentStage') - if development_stage is not None: - donor['development_stage'] = sorted(development_stage) - if donor: - donor.update( - { - 'biomaterial_id': f'donor_organism_{file_name}', - } - ) - contents['donors'].append(donor) - organ = stratum.get('organ') - if organ is not None: - for i, one_organ in enumerate(sorted(organ)): - contents['specimens'].append( - { - 'biomaterial_id': f'specimen_from_organism_{i}_{file_name}', - 'organ': one_organ, - }, - ) - library = stratum.get('libraryConstructionApproach') - if library is not None: - contents['library_preparation_protocols'].append( - { - 'library_construction_approach': sorted(library), - } - ) - return contents - - def group_zarrs(self, - files: Iterable[api.File] - ) -> Mapping[str, list[api.File]]: - zarr_stores = defaultdict(list) - for file in files: - file_name = file.manifest_entry.name - is_zarr, zarr_name, sub_name = _parse_zarr_file_name(file_name) - if is_zarr: - # Leave the representative file out of the list since it's already in the manifest - if not sub_name.startswith('.zattrs'): - zarr_stores[zarr_name].append(file) - return zarr_stores - - -class CellSuspensionTransformer(PartitionedTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'cell_suspensions' - - def _entities(self) -> Iterable[api.CellSuspension]: - for biomaterial in self.api_bundle.biomaterials.values(): - if isinstance(biomaterial, api.CellSuspension): - yield biomaterial - - def _transform(self, - cell_suspensions: Iterable[api.CellSuspension] - ) -> Iterable[Contribution]: - for cell_suspension in cell_suspensions: - samples: dict[str, Sample] = dict() - self._find_ancestor_samples(cell_suspension, samples) - visitor = TransformerVisitor() - cell_suspension.accept(visitor) - cell_suspension.ancestors(visitor) - contents = dict(self._samples(samples.values()), - sequencing_inputs=list( - map(self._sequencing_input, visitor.sequencing_inputs.values()) - ), - specimens=list(map(self._specimen, visitor.specimens.values())), - cell_suspensions=[self._cell_suspension(cell_suspension)], - cell_lines=list(map(self._cell_line, visitor.cell_lines.values())), - donors=list(map(self._donor, visitor.donors.values())), - organoids=list(map(self._organoid, visitor.organoids.values())), - files=list(map(self._file, visitor.files.values())), - **self._protocols(visitor), - sequencing_processes=list( - map(self._sequencing_process, visitor.sequencing_processes.values()) - ), - dates=[self._date(cell_suspension)], - projects=[self._project(self._api_project)]) - yield self._contribution(contents, cell_suspension.ref.entity_id) - - -class SampleTransformer(PartitionedTransformer): - - @classmethod - def entity_type(cls) -> str: - return 'samples' - - @classmethod - def inner_entity_types(cls) -> frozenset[str]: - return frozenset([ - cls.entity_type(), - 'sample_cell_lines', - 'sample_organoids', - 'sample_specimens' - ]) - - def _entities(self) -> Iterable[Sample]: - samples: dict[str, Sample] = dict() - for file in self.api_bundle.not_stitched(self.api_bundle.files): - self._find_ancestor_samples(file, samples) - return samples.values() - - def _transform(self, samples: Iterable[Sample]) -> Iterable[Contribution]: - for sample in samples: - visitor = TransformerVisitor() - sample.accept(visitor) - sample.ancestors(visitor) - contents = dict(self._samples([sample]), - sequencing_inputs=list( - map(self._sequencing_input, visitor.sequencing_inputs.values()) - ), - specimens=list(map(self._specimen, visitor.specimens.values())), - cell_suspensions=list(map(self._cell_suspension, visitor.cell_suspensions.values())), - cell_lines=list(map(self._cell_line, visitor.cell_lines.values())), - donors=list(map(self._donor, visitor.donors.values())), - organoids=list(map(self._organoid, visitor.organoids.values())), - files=list(map(self._file, visitor.files.values())), - **self._protocols(visitor), - sequencing_processes=list( - map(self._sequencing_process, visitor.sequencing_processes.values()) - ), - dates=[self._date(sample)], - projects=[self._project(self._api_project)]) - yield self._contribution(contents, sample.ref.entity_id) - - -class BundleAsEntity(DatedEntity): - - def __init__(self, bundle: api.Bundle) -> None: - super().__init__() - self.document_id = bundle.uuid - # A bundle's version should be a sortable string, however we happen to - # know that all bundles in current deployments use a DCP/2 version - # string, so we use this to set the entity's date fields. - date = parse_dcp2_version(bundle.version) - self.update_date = date - self.submission_date = date - - -class SingletonTransformer(BaseTransformer, metaclass=ABCMeta): - """ - A transformer for entity types of which there is exactly one instance in - every bundle. - """ - - @property - def _singleton_id(self) -> api.UUID4: - return self._singleton_entity().document_id - - @abstractmethod - def _singleton_entity(self) -> DatedEntity: - raise NotImplementedError - - def _dated_entities(self) -> Iterable[DatedEntity]: - return self.api_bundle.not_stitched(self.api_bundle.entities) - - def estimate(self, partition: BundlePartition) -> int: - return int(partition.contains(self._singleton_id)) - - def transform(self, partition: BundlePartition) -> Iterable[Contribution]: - if partition.contains(self._singleton_id): - yield self._transform() - - def _transform(self) -> Contribution: - # Project entities are not explicitly linked in the graph. The mere - # presence of project metadata in a bundle indicates that all other - # entities in that bundle belong to that project. Because of that we - # can't rely on a visitor to collect the related entities but have to - # enumerate them explicitly. - # FIXME: https://github.com/DataBiosphere/azul/issues/3270 - # Comment doesn't match code behavior - # The enumeration should not include any - # stitched entities because those will be discovered when the stitched - # bundle is transformed. - # - visitor = TransformerVisitor() - for specimen in self.api_bundle.specimens: - specimen.accept(visitor) - specimen.ancestors(visitor) - samples: dict[str, Sample] = dict() - for file in self.api_bundle.files.values(): - file.accept(visitor) - file.ancestors(visitor) - self._find_ancestor_samples(file, samples) - matrices = [ - self._matrix(file) - for file in visitor.files.values() - if ( - file.is_matrix - and not self._is_intermediate_matrix(file) - and Submitter.category_for_file(file) == SubmitterCategory.internal - ) - ] - contributed_analyses = [ - self._matrix(file) - for file in visitor.files.values() - if ( - (file.is_matrix or isinstance(file, api.AnalysisFile)) - and not self._is_intermediate_matrix(file) - and Submitter.category_for_file(file) == SubmitterCategory.external - ) - ] - - contents = dict(self._samples(samples.values()), - sequencing_inputs=list( - map(self._sequencing_input, visitor.sequencing_inputs.values()) - ), - specimens=list(map(self._specimen, visitor.specimens.values())), - cell_suspensions=list(map(self._cell_suspension, visitor.cell_suspensions.values())), - cell_lines=list(map(self._cell_line, visitor.cell_lines.values())), - donors=list(map(self._donor, visitor.donors.values())), - organoids=list(map(self._organoid, visitor.organoids.values())), - files=list(map(self._file, visitor.files.values())), - **self._protocols(visitor), - sequencing_processes=list( - map(self._sequencing_process, visitor.sequencing_processes.values()) - ), - matrices=matrices, - contributed_analyses=contributed_analyses, - dates=[self._date(self._singleton_entity())], - projects=[self._project(self._api_project)]) - return self._contribution(contents, str(self._singleton_id)) - - -class ProjectTransformer(SingletonTransformer): - - def _singleton_entity(self) -> DatedEntity: - return self._api_project - - @classmethod - def entity_type(cls) -> str: - return 'projects' - - -class BundleTransformer(SingletonTransformer): - - # FIXME: Some replicas are still missing for HCA - # https://github.com/DataBiosphere/azul/issues/6597 - - def _singleton_entity(self) -> DatedEntity: - return BundleAsEntity(self.api_bundle) - - @classmethod - def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None: - if entity_type == 'files': - return None - else: - return super().aggregator(entity_type) - - @classmethod - def entity_type(cls) -> str: - return 'bundles' diff --git a/src/azul/plugins/metadata/hca/service/__init__.py b/src/azul/plugins/metadata/hca/service/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/metadata/hca/service/aggregation.py b/src/azul/plugins/metadata/hca/service/aggregation.py deleted file mode 100644 index bb30ceb084..0000000000 --- a/src/azul/plugins/metadata/hca/service/aggregation.py +++ /dev/null @@ -1,153 +0,0 @@ -from typing import ( - Mapping, -) - -from opensearchpy import ( - Q, - Search, -) -from opensearchpy.helpers.aggs import ( - Agg, -) - -from azul import ( - cached_property, - config, -) -from azul.plugins import ( - FieldPath, - dotted, -) -from azul.service.elasticsearch_service import ( - AggregationStage, -) -from azul.types import ( - MutableJSON, -) - - -class HCAAggregationStage(AggregationStage): - - def _prepare_aggregation(self, *, facet: str, facet_path: FieldPath) -> Agg: - agg = super()._prepare_aggregation(facet=facet, facet_path=facet_path) - - if facet == 'project': - sub_path = dotted(self.plugin.field_mapping['projectId'], 'keyword') - agg.aggs['myTerms'].bucket(name='myProjectIds', - agg_type='terms', - field=sub_path, - size=config.terms_aggregation_size) - elif facet == 'fileFormat': - # FIXME: Use of shadow field is brittle - # https://github.com/DataBiosphere/azul/issues/2289 - def set_summary_agg(field: str, bucket: str) -> None: - path = dotted(self.plugin.field_mapping[field]) + '_' - agg.aggs['myTerms'].metric(bucket, 'sum', field=path) - agg.aggs['untagged'].metric(bucket, 'sum', field=path) - - set_summary_agg(field='fileSize', bucket='size_by_type') - set_summary_agg(field='matrixCellCount', bucket='matrix_cell_count_by_type') - - return agg - - -class HCASummaryAggregationStage(HCAAggregationStage): - - def prepare_request(self, request: Search) -> Search: - request = super().prepare_request(request) - entity_type = self.entity_type - - def add_filters_sum_agg(parent_field, parent_bucket, child_field, child_bucket): - parent_field_type = self.service.field_type(self.catalog, tuple(parent_field.split('.'))) - null_value = parent_field_type.to_index(None) - request.aggs.bucket( - parent_bucket, - 'filters', - filters={ - 'hasSome': Q('bool', must=[ - Q('exists', field=parent_field), # field exists... - Q('bool', must_not=[ # ...and is not zero or null - Q('terms', **{parent_field: [0, null_value]}) - ]) - ]) - }, - other_bucket_key='hasNone', - ).metric( - child_bucket, - 'sum', - field=child_field - ) - - if entity_type == 'files': - # Add a total file size aggregate - request.aggs.metric('totalFileSize', - 'sum', - field='contents.files.size_') - elif entity_type == 'cell_suspensions': - # Add a cell count aggregate per organ - request.aggs.bucket( - 'cellCountSummaries', - 'terms', - field='contents.cell_suspensions.organ.keyword', - size=config.terms_aggregation_size - ).bucket( - 'cellCount', - 'sum', - field='contents.cell_suspensions.total_estimated_cells_' - ) - elif entity_type == 'samples': - # Add an organ aggregate to the Elasticsearch request - request.aggs.bucket('organTypes', - 'terms', - field='contents.samples.effective_organ.keyword', - size=config.terms_aggregation_size) - elif entity_type == 'projects': - # Add project cell count sum aggregates from the projects with and - # without any cell suspension cell counts. - add_filters_sum_agg(parent_field='contents.cell_suspensions.total_estimated_cells', - parent_bucket='cellSuspensionCellCount', - child_field='contents.projects.estimated_cell_count_', - child_bucket='projectCellCount') - # Add cell suspensions cell count sum aggregates from projects - # with and without a project level estimated cell count. - add_filters_sum_agg(parent_field='contents.projects.estimated_cell_count', - parent_bucket='projectCellCount', - child_field='contents.cell_suspensions.total_estimated_cells_', - child_bucket='cellSuspensionCellCount') - else: - assert False, entity_type - - threshold = config.precision_threshold - for agg_name, cardinality in self._cardinality_aggregations.items(): - request.aggs.metric(agg_name, - 'cardinality', - field=cardinality + '.keyword', - precision_threshold=str(threshold)) - - self._annotate_aggs_for_translation(request) - request = request.extra(size=0) - return request - - @cached_property - def _cardinality_aggregations(self) -> Mapping[str, str]: - return { - 'samples': { - 'specimenCount': 'contents.specimens.document_id', - 'speciesCount': 'contents.donors.genus_species', - 'donorCount': 'contents.donors.document_id', - }, - 'projects': { - 'labCount': 'contents.projects.laboratory', - } - }.get(self.entity_type, {}) - - def process_response(self, response: MutableJSON) -> MutableJSON: - response = super().process_response(response) - result = response['aggregations'] - threshold = config.precision_threshold - - for agg_name in self._cardinality_aggregations: - agg_value = result[agg_name]['value'] - assert agg_value <= threshold * .9, (agg_name, agg_value, threshold) - - return result diff --git a/src/azul/plugins/metadata/hca/service/contributor_matrices.py b/src/azul/plugins/metadata/hca/service/contributor_matrices.py deleted file mode 100644 index 5e2f2df0eb..0000000000 --- a/src/azul/plugins/metadata/hca/service/contributor_matrices.py +++ /dev/null @@ -1,367 +0,0 @@ -from collections import ( - defaultdict, -) -from itertools import ( - chain, - product, -) -from typing import ( - Mapping, - Sequence, -) - -from azul.collections import ( - NestedDict, -) -from azul.types import ( - JSON, - JSONs, -) - -default_order_of_matrix_dimensions = [ - 'genusSpecies', - 'developmentStage', - 'organ', - 'libraryConstructionApproach', -] - - -def parse_strata(strata: str) -> JSONs: - """ - >>> from azul.doctests import assert_json - >>> def f(strata): - ... return assert_json(parse_strata(strata)) - - >>> f('a=A1;b=B1,B2') - [ - { - "a": [ - "A1" - ], - "b": [ - "B1", - "B2" - ] - } - ] - - >>> f('a=A1;b=B1\\na=A2;b=B2,B3') - [ - { - "a": [ - "A1" - ], - "b": [ - "B1" - ] - }, - { - "a": [ - "A2" - ], - "b": [ - "B2", - "B3" - ] - } - ] - - >>> f('') - Traceback (most recent call last): - ... - ValueError: not enough values to unpack (expected 2, got 1) - """ - return [ - { - dimension: values.split(',') - for dimension, values in (point.split('=') for point in stratum.split(';')) - } - for stratum in strata.split('\n') - ] - - -def make_stratification_tree(files: Sequence[Mapping[str, str]]) -> JSON: - """ - >>> from azul.doctests import assert_json - >>> def f(files): - ... return assert_json(make_stratification_tree(files)) - - >>> f( - ... [ - ... { - ... 'uuid': 'u', - ... 'version': 'v', - ... 'name': 'n', - ... 'size': 1, - ... 'source': 's', - ... 'strata': 'developmentStage=a;genusSpecies=b;organ=c' - ... } - ... ] - ... ) - { - "genusSpecies": { - "b": { - "developmentStage": { - "a": { - "organ": { - "c": [ - { - "uuid": "u", - "version": "v", - "name": "n", - "size": 1, - "source": "s", - "drs_uri": null - } - ] - } - } - } - } - } - } - - >>> f( - ... [ - ... { - ... 'uuid': 'u1', - ... 'version': 'v1', - ... 'name': 'n1', - ... 'size': 1, - ... 'source': 's1', - ... 'strata': 'genusSpecies=a;organ=b' - ... }, - ... { - ... 'uuid': 'u2', - ... 'version': 'v2', - ... 'name': 'n2', - ... 'size': 2, - ... 'source': 's2', - ... 'strata': 'genusSpecies=a;organ=b' - ... } - ... ] - ... ) - { - "genusSpecies": { - "a": { - "organ": { - "b": [ - { - "uuid": "u1", - "version": "v1", - "name": "n1", - "size": 1, - "source": "s1", - "drs_uri": null - }, - { - "uuid": "u2", - "version": "v2", - "name": "n2", - "size": 2, - "source": "s2", - "drs_uri": null - } - ] - } - } - } - } - - >>> f( - ... [ - ... { - ... 'uuid': 'u1', - ... 'version': 'v1', - ... 'name': 'n1', - ... 'size': 1, - ... 'source': 's1', - ... 'strata': 'genusSpecies=a;organ=b\\ngenusSpecies=c;organ=d' - ... }, - ... { - ... 'uuid': 'u2', - ... 'version': 'v2', - ... 'name': 'n2', - ... 'size': 2, - ... 'source': 's2', - ... 'strata': 'genusSpecies=a,e;organ=f' - ... } - ... ] - ... ) - { - "genusSpecies": { - "a": { - "organ": { - "b": [ - { - "uuid": "u1", - "version": "v1", - "name": "n1", - "size": 1, - "source": "s1", - "drs_uri": null - } - ], - "f": [ - { - "uuid": "u2", - "version": "v2", - "name": "n2", - "size": 2, - "source": "s2", - "drs_uri": null - } - ] - } - }, - "c": { - "organ": { - "d": [ - { - "uuid": "u1", - "version": "v1", - "name": "n1", - "size": 1, - "source": "s1", - "drs_uri": null - } - ] - } - }, - "e": { - "organ": { - "f": [ - { - "uuid": "u2", - "version": "v2", - "name": "n2", - "size": 2, - "source": "s2", - "drs_uri": null - } - ] - } - } - } - } - - >>> f( - ... [ - ... { - ... 'uuid': 'u', - ... 'version': 'v', - ... 'name': 'n', - ... 'size': 1, - ... 'source': 's', - ... 'strata': 'genusSpecies=a;organ=b\\ngenusSpecies=a' - ... } - ... ] - ... ) - { - "genusSpecies": { - "a": { - "organ": { - "b": [ - { - "uuid": "u", - "version": "v", - "name": "n", - "size": 1, - "source": "s", - "drs_uri": null - } - ], - "Unspecified": [ - { - "uuid": "u", - "version": "v", - "name": "n", - "size": 1, - "source": "s", - "drs_uri": null - } - ] - } - } - } - } - - >>> f( - ... [ - ... { - ... 'uuid': 'u', - ... 'version': 'v', - ... 'name': 'n', - ... 'size': 1, - ... 'source': 's', - ... 'strata': 'genusSpecies=a;foo=b' - ... } - ... ] - ... ) - Traceback (most recent call last): - ... - ValueError: 'foo' is not in list - """ - assert len(set(file['uuid'] for file in files)) == len(files), files - - files = [ - { - **file, - # Each line in the stratification string represents a stratum, - # each stratum is a list of points, each point has a dimension - # and a list of values. Transform that string into a list of - # dictionaries. Each entry in those dictionaries maps the - # dimension to a value in that dimension. If dimension in a - # stratum has multiple values, the stratum is expanded into - # multiple strata, one per value. The strata are identical except - # in the dimension that had the multiple values. - 'strata': list(chain.from_iterable( - map(dict, product(*( - [(dimension, value) for value in values] - for dimension, values in stratum.items() - ))) - for stratum in parse_strata(file['strata']) - )), - 'drs_uri': file.get('drs_uri') - } - for file in files - ] - - def dimension_placement(dimension: str) -> tuple[int, int]: - dimension_index = default_order_of_matrix_dimensions.index(dimension) - return len(distinct_values[dimension]), dimension_index - - # To produce a tree with the most shared base branches possible we sort - # the dimensions by number of distinct values on each dimension, and - # secondarily sort according to the defined default ordering. - distinct_values = defaultdict(set) - for file in files: - for stratum in file['strata']: - for dimension, value in stratum.items(): - distinct_values[dimension].add(value) - sorted_dimensions = sorted(distinct_values, key=dimension_placement) - - # Ensure every stratum of every file has the same dimensions - for file in files: - for stratum in file['strata']: - # FIXME: https://github.com/DataBiosphere/azul/issues/2443 - # Instead of creating 'Unspecified' nodes the tree branches - # should not include those nodes, making the branches shorter - # and of different lengths. - for dimension in set(sorted_dimensions).difference(stratum.keys()): - stratum[dimension] = 'Unspecified' - - # Build the tree, as a nested dictionary. The keys in the dictionary - # alternate between dimensions and values. The leaves of the tree are - # lists of matrix files. If a matrix covers multiple strata, it will occur - # multiple times in the tree. - tree = NestedDict(2 * len(sorted_dimensions) - 1, list) - for file in files: - for stratum in file['strata']: - node = tree - for dimension in sorted_dimensions: - value = stratum[dimension] - node = node[dimension][value] - node.append({k: v for k, v in file.items() if k != 'strata'}) - - return tree.to_dict() diff --git a/src/azul/plugins/metadata/hca/service/filter.py b/src/azul/plugins/metadata/hca/service/filter.py deleted file mode 100644 index c2ba76ab88..0000000000 --- a/src/azul/plugins/metadata/hca/service/filter.py +++ /dev/null @@ -1,10 +0,0 @@ -from azul.service.elasticsearch_service import ( - FilterStage, -) - - -class HCAFilterStage(FilterStage): - - @property - def _limit_access(self) -> bool: - return self.entity_type != 'projects' diff --git a/src/azul/plugins/metadata/hca/service/response.py b/src/azul/plugins/metadata/hca/service/response.py deleted file mode 100644 index 0ab6f72243..0000000000 --- a/src/azul/plugins/metadata/hca/service/response.py +++ /dev/null @@ -1,578 +0,0 @@ -from itertools import ( - permutations, - product, -) -import logging -from typing import ( - Callable, - Mapping, - Sequence, - TypeVar, - TypedDict, - cast, -) - -from more_itertools import ( - one, -) - -from azul import ( - cached_property, -) -from azul.plugins import ( - SpecialFields, -) -from azul.plugins.metadata.hca.service.contributor_matrices import ( - make_stratification_tree, -) -from azul.service.elasticsearch_service import ( - ResponsePagination, - ResponseTriple, -) -from azul.service.repository_service import ( - SearchResponseStage, - SummaryResponseStage, -) -from azul.strings import ( - to_camel_case, -) -from azul.types import ( - AnyJSON, - JSON, - JSONs, - MutableJSON, - MutableJSONs, -) - -log = logging.getLogger(__name__) - - -class ValueAndUnit(TypedDict): - value: str - unit: str - - -class Term(TypedDict): - count: int - term: str | ValueAndUnit | None - - -class ProjectTerm(Term): - projectId: list[str] - - -class Terms(TypedDict): - terms: list[Term] - total: int - # FIXME: Remove type from termsFacets in /index responses - # https://github.com/DataBiosphere/azul/issues/2460 - type: str - - -class FileTypeSummary(TypedDict): - format: str - count: int - totalSize: float - matrixCellCount: float - - -class FileTypeSummaryForHit(FileTypeSummary): - fileSource: list[str | None] - isIntermediate: bool - contentDescription: list[str | None] - - -class OrganCellCountSummary(TypedDict): - organType: list[str | None] - countOfDocsWithOrganType: int - totalCellCountByOrgan: float - - -class Hit(TypedDict): - protocols: JSONs - entryId: str - sources: JSONs - projects: JSONs - samples: JSONs - specimens: JSONs - cellLines: JSONs - donorOrganisms: JSONs - organoids: JSONs - cellSuspensions: JSONs - dates: JSONs - - -class CompleteHit(Hit): - bundles: JSONs - files: JSONs - - -class SummarizedHit(Hit): - fileTypeSummaries: list[FileTypeSummary] - - -class SearchResponse(TypedDict): - hits: list[SummarizedHit | CompleteHit] - pagination: ResponsePagination - termFacets: dict[str, Terms] - - -class SummaryResponse(TypedDict): - projectCount: int - specimenCount: int - speciesCount: int - fileCount: int - totalFileSize: float - donorCount: int - labCount: int - organTypes: list[str] - fileTypeSummaries: list[FileTypeSummary] - cellCountSummaries: list[OrganCellCountSummary] - projects: JSONs - - -T = TypeVar('T') - - -class HCASummaryResponseStage(SummaryResponseStage): - - @property - def aggs_by_authority(self) -> Mapping[str, Sequence[str]]: - return { - 'files': [ - 'totalFileSize', - 'fileFormat', - ], - 'samples': [ - 'organTypes', - 'donorCount', - 'specimenCount', - 'speciesCount' - ], - 'projects': [ - 'project', - 'labCount', - 'cellSuspensionCellCount', - 'projectCellCount', - ], - 'cell_suspensions': [ - 'cellCountSummaries', - ] - } - - def process_response(self, response: JSON) -> SummaryResponse: - response = self.make_response(response) - self._validate_response(response) - return response - - def _validate_response(self, response: SummaryResponse): - for total, summary_field in ( - (response['totalFileSize'], 'totalSize'), - (response['fileCount'], 'count') - ): - summaries = cast(JSONs, response['fileTypeSummaries']) - summary_total = sum(summary[summary_field] for summary in summaries) - assert total == summary_total, (total, summary_total) - - def make_response(self, aggs: JSON) -> SummaryResponse: - def agg_value(*path: str) -> AnyJSON: - agg = aggs - for name in path: - agg = agg[name] - return agg - - def agg_values(function: Callable[[JSON], T], *path: str) -> list[T]: - values = agg_value(*path) - assert isinstance(values, list) - return list(map(function, values)) - - bools = [False, True] - cell_counts = { - child: { - (parent, present): agg_value(parent + 'CellCount', - 'buckets', - 'hasSome' if present else 'hasNone', - child + 'CellCount', - 'value') - for present in bools - } - for parent, child in permutations(['project', 'cellSuspension']) - } - - def file_type_summary(bucket: JSON) -> FileTypeSummary: - return FileTypeSummary( - count=bucket['doc_count'], - totalSize=bucket['size_by_type']['value'], - matrixCellCount=bucket['matrix_cell_count_by_type']['value'], - format=bucket['key'] - ) - - def organ_cell_count_summary(bucket: JSON) -> OrganCellCountSummary: - return OrganCellCountSummary( - organType=[bucket['key']], - countOfDocsWithOrganType=bucket['doc_count'], - totalCellCountByOrgan=bucket['cellCount']['value'] - ) - - def organ_type(bucket: JSON) -> str: - return bucket['key'] - - return SummaryResponse(projectCount=agg_value('project', 'doc_count'), - specimenCount=agg_value('specimenCount', 'value'), - speciesCount=agg_value('speciesCount', 'value'), - fileCount=agg_value('fileFormat', 'doc_count'), - totalFileSize=agg_value('totalFileSize', 'value'), - donorCount=agg_value('donorCount', 'value'), - labCount=agg_value('labCount', 'value'), - organTypes=agg_values(organ_type, 'organTypes', 'buckets'), - fileTypeSummaries=agg_values(file_type_summary, - 'fileFormat', - 'myTerms', - 'buckets'), - cellCountSummaries=agg_values(organ_cell_count_summary, - 'cellCountSummaries', - 'buckets'), - projects=[ - { - 'projects': { - 'estimatedCellCount': ( - cell_counts['project']['cellSuspension', project_present] - if cs_present else None - ) - }, - 'cellSuspensions': { - 'totalCells': ( - cell_counts['cellSuspension']['project', cs_present] - if project_present else None - ) - } - } - for project_present, cs_present in product(bools, bools) - if project_present or cs_present - ]) - - -class HCASearchResponseStage(SearchResponseStage): - - def process_response(self, response: ResponseTriple) -> SearchResponse: - hits, pagination, aggs = response - return SearchResponse(pagination=pagination, - termFacets=self.make_facets(aggs), - hits=self.make_hits(hits)) - - def make_bundles(self, entry) -> MutableJSONs: - return [ - { - self._special_fields.bundle_uuid: b['uuid'], - self._special_fields.bundle_version: b['version'] - } - for b in entry['bundles'] - ] - - def make_sources(self, entry) -> MutableJSONs: - return [ - { - self._special_fields.source_id: s['id'], - self._special_fields.source_spec: s['spec'] - } - for s in entry['sources'] - ] - - @cached_property - def _special_fields(self) -> SpecialFields: - return self.plugin.special_fields - - def make_protocols(self, entry) -> MutableJSONs: - return [ - *( - { - 'workflow': p.get('workflow', None), - } - for p in entry['contents']['analysis_protocols'] - ), - *( - { - 'assayType': p.get('assay_type', None), - } - for p in entry['contents']['imaging_protocols'] - ), - *( - { - 'libraryConstructionApproach': p.get('library_construction_approach', None), - 'nucleicAcidSource': p.get('nucleic_acid_source', None), - } - for p in entry['contents']['library_preparation_protocols']), - *( - { - 'instrumentManufacturerModel': p.get('instrument_manufacturer_model', None), - 'pairedEnd': p.get('paired_end', None), - } - for p in entry['contents']['sequencing_protocols'] - ) - ] - - def make_dates(self, entry) -> MutableJSONs: - return [ - { - 'aggregateLastModifiedDate': dates['aggregate_last_modified_date'], - 'aggregateSubmissionDate': dates['aggregate_submission_date'], - 'aggregateUpdateDate': dates['aggregate_update_date'], - 'lastModifiedDate': dates['last_modified_date'], - 'submissionDate': dates['submission_date'], - 'updateDate': dates['update_date'], - } - for dates in entry['contents']['dates'] - ] - - def make_projects(self, entry) -> MutableJSONs: - projects = [] - contents = entry['contents'] - for project in contents['projects']: - translated_project = { - 'projectId': project['document_id'], - 'projectTitle': project.get('project_title'), - 'projectShortname': project['project_short_name'], - 'laboratory': sorted(set(project.get('laboratory', [None]))), - 'estimatedCellCount': project['estimated_cell_count'], - 'isTissueAtlasProject': project['is_tissue_atlas_project'], - 'tissueAtlas': project.get('tissue_atlas'), - 'bionetworkName': project['bionetwork_name'], - 'dataUseRestriction': project.get('data_use_restriction'), - 'duosId': project.get('duos_id') - } - if self.entity_type == 'projects': - translated_project['projectDescription'] = project.get('project_description', []) - contributors = project.get('contributors', []) # list of dict - translated_project['contributors'] = contributors - publications = project.get('publications', []) # list of dict - translated_project['publications'] = publications - for contributor in contributors: - for key in list(contributor.keys()): - contributor[to_camel_case(key)] = contributor.pop(key) - for publication in publications: - for key in list(publication.keys()): - publication[to_camel_case(key)] = publication.pop(key) - translated_project['supplementaryLinks'] = project.get('supplementary_links', [None]) - translated_project['matrices'] = self.make_matrices_(contents['matrices']) - translated_project['contributedAnalyses'] = self.make_matrices_(contents['contributed_analyses']) - translated_project['accessions'] = project.get('accessions', [None]) - projects.append(translated_project) - return projects - - # FIXME: Move this to during aggregation - # https://github.com/DataBiosphere/azul/issues/2415 - - def make_matrices_(self, matrices: JSONs) -> JSON: - files = [] - if matrices: - for file in cast(JSONs, one(matrices)['file']): - translated_file = { - **self.make_translated_file(file), - 'strata': file['strata'] - } - files.append(translated_file) - return make_stratification_tree(files) - - def make_files(self, entry: JSON) -> JSONs: - files = [] - for _file in entry['contents']['files']: - translated_file = self.make_translated_file(_file) - files.append(translated_file) - return files - - def make_translated_file(self, file: JSON) -> JSON: - translated_file = { - 'contentDescription': file.get('content_description'), - 'format': file.get('file_format'), - 'isIntermediate': file.get('is_intermediate'), - 'name': file.get('name'), - 'sha256': file.get('sha256'), - 'size': file.get('size'), - 'fileSource': file.get('file_source'), - 'uuid': file.get('uuid'), - 'version': file.get('version'), - 'matrixCellCount': file.get('matrix_cell_count'), - 'drs_uri': file.get('drs_uri') - } - return translated_file - - def make_specimen(self, specimen) -> MutableJSON: - return { - 'id': specimen['biomaterial_id'], - 'organ': specimen.get('organ', None), - 'organPart': specimen.get('organ_part', None), - 'disease': specimen.get('disease', None), - 'preservationMethod': specimen.get('preservation_method', None), - 'source': specimen.get('_source', None) - } - - def make_specimens(self, entry) -> MutableJSONs: - return [self.make_specimen(specimen) for specimen in entry['contents']['specimens']] - - cell_suspension_fields = [ - ('organ', 'organ'), - ('organPart', 'organ_part'), - ('selectedCellType', 'selected_cell_type'), - ('totalCells', 'total_estimated_cells'), - ('totalCellsRedundant', 'total_estimated_cells_redundant') - ] - - def make_cell_suspension(self, cell_suspension) -> MutableJSON: - return { - k: cell_suspension.get(v, None) - for k, v in self.cell_suspension_fields - } - - def make_cell_suspensions(self, entry) -> MutableJSONs: - return [self.make_cell_suspension(cs) for cs in entry['contents']['cell_suspensions']] - - def make_cell_line(self, cell_line) -> MutableJSON: - return { - 'id': cell_line['biomaterial_id'], - 'cellLineType': cell_line.get('cell_line_type', None), - 'modelOrgan': cell_line.get('model_organ', None), - } - - def make_cell_lines(self, entry) -> MutableJSONs: - return [self.make_cell_line(cell_line) for cell_line in entry['contents']['cell_lines']] - - def make_donor(self, donor) -> MutableJSON: - return { - 'id': donor['biomaterial_id'], - 'donorCount': donor.get('donor_count', None), - 'developmentStage': donor.get('development_stage', None), - 'genusSpecies': donor.get('genus_species', None), - 'organismAge': donor.get('organism_age', None), - 'organismAgeRange': donor.get('organism_age_range', None), # list of dict - 'biologicalSex': donor.get('biological_sex', None), - 'disease': donor.get('diseases', None) - } - - def make_donors(self, entry) -> MutableJSONs: - return [self.make_donor(donor) for donor in entry['contents']['donors']] - - def make_organoid(self, organoid) -> MutableJSON: - return { - 'id': organoid['biomaterial_id'], - 'modelOrgan': organoid.get('model_organ', None), - 'modelOrganPart': organoid.get('model_organ_part', None) - } - - def make_organoids(self, entry) -> MutableJSONs: - return [self.make_organoid(organoid) for organoid in entry['contents']['organoids']] - - def make_sample(self, sample, entity_dict, entity_type) -> MutableJSON: - is_aggregate = isinstance(sample['document_id'], list) - organ_prop = 'organ' if entity_type == 'specimens' else 'model_organ' - return { - 'sampleEntityType': [entity_type] if is_aggregate else entity_type, - 'effectiveOrgan': sample[organ_prop], - **entity_dict - } - - def make_samples(self, entry) -> MutableJSONs: - pieces = [ - (self.make_cell_line, 'cellLines', 'sample_cell_lines'), - (self.make_organoid, 'organoids', 'sample_organoids'), - (self.make_specimen, 'specimens', 'sample_specimens'), - ] - return [ - self.make_sample(sample, entity_fn(sample), entity_type) - for entity_fn, entity_type, sample_entity_type in pieces - for sample in entry['contents'].get(sample_entity_type, []) - ] - - def make_hits(self, hits: JSONs) -> MutableJSONs: - return list(map(self.make_hit, hits)) - - def make_hit(self, es_hit) -> MutableJSON: - hit = Hit(protocols=self.make_protocols(es_hit), - entryId=es_hit['entity_id'], - sources=self.make_sources(es_hit), - projects=self.make_projects(es_hit), - samples=self.make_samples(es_hit), - specimens=self.make_specimens(es_hit), - cellLines=self.make_cell_lines(es_hit), - donorOrganisms=self.make_donors(es_hit), - organoids=self.make_organoids(es_hit), - cellSuspensions=self.make_cell_suspensions(es_hit), - dates=self.make_dates(es_hit)) - if self.entity_type in ('files', 'bundles'): - hit = cast(CompleteHit, hit) - hit['bundles'] = self.make_bundles(es_hit) - hit['files'] = self.make_files(es_hit) - else: - hit = cast(SummarizedHit, hit) - - def file_type_summary(aggregate_file: JSON) -> FileTypeSummaryForHit: - summary = FileTypeSummaryForHit( - count=aggregate_file['count'], - fileSource=cast(list, aggregate_file['file_source']), - totalSize=aggregate_file['size'], - matrixCellCount=aggregate_file['matrix_cell_count'], - format=aggregate_file['file_format'], - isIntermediate=aggregate_file['is_intermediate'], - contentDescription=cast(list, aggregate_file['content_description']) - ) - assert isinstance(summary['format'], str), type(str) - assert summary['format'] - return summary - - hit['fileTypeSummaries'] = [ - file_type_summary(aggregate_file) - for aggregate_file in es_hit['contents']['files'] - ] - return hit - - def make_terms(self, agg) -> Terms: - def choose_entry(_term): - if 'key_as_string' in _term: - return _term['key_as_string'] - elif (term_key := _term['key']) is None: - return None - elif isinstance(term_key, bool): - return str(term_key).lower() - elif isinstance(term_key, dict): - return term_key - else: - return str(term_key) - - terms: list[Term] = [] - for bucket in agg['myTerms']['buckets']: - term = Term(term=choose_entry(bucket), - count=bucket['doc_count']) - try: - sub_agg = bucket['myProjectIds'] - except KeyError: - pass - else: - project_ids = [sub_bucket['key'] for sub_bucket in sub_agg['buckets']] - term = cast(ProjectTerm, term) - term['projectId'] = project_ids - terms.append(term) - - untagged_count = agg['untagged']['doc_count'] - - # Add the untagged_count to the existing termObj for a None value, or - # add a new one - if untagged_count > 0: - for term in terms: - if term['term'] is None: - term['count'] += untagged_count - untagged_count = 0 - break - if untagged_count > 0: - terms.append(Term(term=None, count=untagged_count)) - - return Terms(terms=terms, - total=0 if len(agg['myTerms']['buckets']) == 0 else agg['doc_count'], - # FIXME: Remove type from termsFacets in /index responses - # https://github.com/DataBiosphere/azul/issues/2460 - type='terms') - - def make_facets(self, aggs: JSON) -> MutableJSON: - facets = {} - for facet, agg in aggs.items(): - if facet != '_project_agg': # Filter out project specific aggs - facets[facet] = self.make_terms(agg) - return facets diff --git a/src/azul/plugins/repository/__init__.py b/src/azul/plugins/repository/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/azul/plugins/repository/canned/__init__.py b/src/azul/plugins/repository/canned/__init__.py deleted file mode 100644 index 093cc5a9c4..0000000000 --- a/src/azul/plugins/repository/canned/__init__.py +++ /dev/null @@ -1,308 +0,0 @@ -""" -This repository plugin allows reading from a canned staging area like the one in -the GitHub repo https://github.com/HumanCellAtlas/schema-test-data . - -NOTE: This plugin's purpose is for testing and verification of a canned staging -area, and should not be used to create catalogs on a deployment. It can however -be used with the `can_bundle.py` script to create a local canned bundle from the -files in the canned staging area. -""" -import logging -from pathlib import ( - Path, -) -from tempfile import ( - TemporaryDirectory, -) -import time - -from furl import ( - furl, -) - -from azul import ( - R, - lru_cache, -) -from azul.auth import ( - Authentication, -) -from azul.drs import ( - DRSClient, -) -from azul.http import ( - HasCachedHttpClient, -) -from azul.indexer import ( - SimpleSourceSpec, - SourceRef, - SourcedBundleFQID, -) -from azul.plugins import ( - RepositoryFileDownload, - RepositoryPlugin, -) -from azul.plugins.metadata.hca import ( - HCAFile, -) -from azul.plugins.metadata.hca.bundle import ( - HCABundle, -) -from azul.time import ( - parse_dcp2_version, -) -from azul.types import ( - JSON, -) -from humancellatlas.data.metadata.helpers.staging_area import ( - CannedStagingAreaFactory, - StagingArea, -) - -log = logging.getLogger(__name__) - - -class CannedSourceRef(SourceRef[SimpleSourceSpec]): - pass - - -class CannedBundleFQID(SourcedBundleFQID[CannedSourceRef]): - pass - - -class CannedBundle(HCABundle[CannedBundleFQID]): - - @classmethod - def canning_qualifier(cls) -> str: - return 'gh.hca' - - def drs_uri(self, manifest_entry: JSON) -> str | None: - return 'dss' - - -class Plugin(RepositoryPlugin[ - CannedBundle, - SimpleSourceSpec, - CannedSourceRef, - CannedBundleFQID - ], - HasCachedHttpClient): - - def list_sources(self, - authentication: Authentication | None - ) -> list[CannedSourceRef]: - return [ - CannedSourceRef(id=self._lookup_source_id(spec), spec=spec) - for spec in self.sources - ] - - def _lookup_source_id(self, spec: SimpleSourceSpec) -> str: - return str(spec) - - def parse_github_url(self, url: furl) -> tuple[furl, Path, str]: - """ - Parse a GitHub URL. - - :param url: A GitHub URL of the format - https://github.com///tree/[/]. Note - that REF can be the name of a branch, the name of a tag, or - a commit SHA. If REF contains special characters like `/`, - '?` or `#` they must be URL-encoded. This is especially - noteworthy for `/` in branch names. - - :return: A tuple containing the URL of a GitHub repository, a relative - path inside that repository, and a Git ref. - - >>> plugin = Plugin(catalog='') - - >>> plugin.parse_github_url(furl('https://github.com/OWNER/NAME/tree/REF/tests')) - (furl('https://github.com/OWNER/NAME.git'), PosixPath('tests'), 'REF') - """ - assert url.scheme == 'https', R('Unexpected scheme', url) - assert url.host == 'github.com', R('Unexpected host', url) - owner, name, slug, ref, *path = url.path.segments - assert slug == 'tree', R('Unexpected slug', url) - remote_url = furl(url.origin) - remote_url.path.add((owner, f'{name}.git')) - return remote_url, Path(*path), ref - - @lru_cache - def staging_area(self, url: str) -> StagingArea: - """ - Process the contents of a staging area. - - :param url: The URL of a staging area located in a GitHub repository. - - :return: A StagingArea object containing the contents of the staging - area's JSON files. - """ - with TemporaryDirectory() as tmpdir: - remote_url, path, ref = self.parse_github_url(furl(url)) - factory = CannedStagingAreaFactory.clone_remote(remote_url, - Path(tmpdir), - ref) - return factory.load_staging_area(path) - - def count_bundles(self, source: CannedSourceRef) -> int: - staging_area = self.staging_area(source.spec.name) - if source.spec.prefix is None: - return len(staging_area.links) - else: - prefix = source.spec.prefix.common - assert prefix == prefix.lower(), source - return sum( - 1 - for links_id in staging_area.links - if links_id.lower().startswith(prefix) - ) - - def count_files(self, source: SimpleSourceSpec) -> int: - staging_area = self.staging_area(source.name) - if source.prefix is None: - return len(staging_area.descriptors) - else: - prefix = source.prefix.common - assert prefix == prefix.lower(), source - return sum( - 1 - for descriptor in staging_area.descriptors.values() - if descriptor.content['sha256'].lower().startswith(prefix) - ) - - def list_bundles(self, - source: CannedSourceRef, - prefix: str - ) -> list[CannedBundleFQID]: - self._assert_source(source) - self._assert_partition(source, prefix) - assert prefix == prefix.lower(), prefix - staging_area = self.staging_area(source.spec.name) - return [ - CannedBundleFQID(source=source, - uuid=link.uuid, - version=link.version) - for link in staging_area.links.values() - if link.uuid.lower().startswith(prefix) - ] - - def fetch_bundle(self, bundle_fqid: CannedBundleFQID) -> CannedBundle: - self._assert_source(bundle_fqid.source) - now = time.time() - staging_area = self.staging_area(bundle_fqid.source.spec.name) - version, manifest, metadata, links = staging_area.get_bundle_parts(bundle_fqid.uuid) - if bundle_fqid.version is None: - bundle_fqid = CannedBundleFQID(source=bundle_fqid.source, - uuid=bundle_fqid.uuid, - version=version) - bundle = CannedBundle(fqid=bundle_fqid, - manifest=manifest, - metadata=metadata, - links=links) - assert version == bundle.version, (version, bundle) - log.info('It took %.003fs to download bundle %s.%s', - time.time() - now, bundle.uuid, bundle.version) - return bundle - - def list_files(self, source: CannedSourceRef, prefix: str) -> list[HCAFile]: - self._assert_source(source) - self._assert_partition(source, prefix) - assert prefix == prefix.lower(), prefix - staging_area = self.staging_area(source.spec.name) - return [ - HCAFile.from_descriptor(descriptor.content, - uuid=file_uuid, - name=descriptor.content['file_name'], - drs_uri=None) - for file_uuid, descriptor in staging_area.descriptors.items() - if descriptor.content['sha256'].lower().startswith(prefix) - ] - - def _construct_file_url(self, url: furl, file_name: str) -> furl: - """ - >>> plugin = Plugin(catalog='') - >>> url = furl('https://github.com/OWNER/REPO/tree/REF/tests') - - >>> plugin._construct_file_url(url, 'foo.zip') - furl('https://github.com/OWNER/REPO/raw/REF/tests/data/foo.zip') - - >>> plugin._construct_file_url(url, '') - Traceback (most recent call last): - ... - AssertionError: R('file_name cannot be empty') - """ - assert url.path.segments[2] == 'tree', R( - 'Unexpected path', str(url)) - file_url = furl(url) - file_url.path.segments[2] = 'raw' - file_url.path.segments.append('data') - assert len(file_name) > 0, R( - 'file_name cannot be empty') - assert not file_name.endswith('/'), R( - "File name cannot end with '/'", file_name) - for segment in file_name.split('/'): - file_url.path.segments.append(segment) - return file_url - - def _direct_file_url(self, - file_uuid: str, - *, - file_version: str | None = None, - ) -> furl | None: - # Check all sources for the file. If a file_version was specified return - # when we find a match, otherwise continue checking all sources and - # return the URL for the match with the latest (largest) version. - found_version = None - found_url = None - for source_spec in self.sources: - staging_area = self.staging_area(source_spec.name) - try: - descriptor = staging_area.descriptors[file_uuid] - except KeyError: - continue - else: - staging_area_url = furl(source_spec.name) - actual_file_version = descriptor.content['file_version'] - if file_version: - if file_version == actual_file_version: - file_name = descriptor.content['file_name'] - return self._construct_file_url(staging_area_url, file_name) - else: - if found_version is None or actual_file_version > found_version: - file_name = descriptor.content['file_name'] - found_url = self._construct_file_url(staging_area_url, file_name) - found_version = actual_file_version - return found_url - - def file_download_class(self) -> type[RepositoryFileDownload]: - return CannedFileDownload - - def drs_client(self, - authentication: Authentication | None = None - ) -> DRSClient: - assert authentication is None, type(authentication) - return DRSClient(http_client=self._http_client) - - def validate_version(self, version: str) -> None: - parse_dcp2_version(version) - - -class CannedFileDownload(RepositoryFileDownload): - _location: furl | None = None - _retry_after: int | None = None - - def update(self, - plugin: RepositoryPlugin, - authentication: Authentication | None - ) -> None: - assert isinstance(plugin, Plugin) - url = plugin._direct_file_url(file_uuid=self.file.uuid, - file_version=self.file.version) - self._location = url - - @property - def location(self) -> str | None: - return None if self._location is None else str(self._location) - - @property - def retry_after(self) -> int | None: - return self._retry_after diff --git a/src/azul/plugins/repository/dss/__init__.py b/src/azul/plugins/repository/dss/__init__.py deleted file mode 100644 index 75e6a1eb69..0000000000 --- a/src/azul/plugins/repository/dss/__init__.py +++ /dev/null @@ -1,275 +0,0 @@ -import logging -import time -from typing import ( - NoReturn, -) -import urllib -import urllib.parse -from uuid import ( - UUID, - uuid5, -) - -import attrs -from furl import ( - furl, -) -from more_itertools import ( - one, -) -import requests - -from azul import ( - config, -) -from azul.auth import ( - Authentication, -) -from azul.collections import ( - adict, -) -from azul.deployment import ( - aws, -) -from azul.drs import ( - DRSClient, -) -from azul.http import ( - HasCachedHttpClient, -) -from azul.indexer import ( - SimpleSourceSpec, - SourceRef, - SourcedBundleFQID, -) -from azul.plugins import ( - RepositoryFileDownload, - RepositoryPlugin, -) -from azul.plugins.metadata.hca.bundle import ( - HCABundle, -) -from azul.time import ( - parse_dcp2_version, -) -from azul.types import ( - JSON, -) - -log = logging.getLogger(__name__) - - -class DSSSourceRef(SourceRef[SimpleSourceSpec]): - """ - Subclass of `Source` to create new namespace for source IDs. - """ - namespace: UUID = UUID('6925391e-6519-41d9-879f-c6307eb83c1c') - - @classmethod - def for_dss_source(cls, source: str): - # We hash the endpoint instead of using it verbatim to distinguish them - # within a document, which is helpful for testing. - spec = SimpleSourceSpec.parse(source) - return cls(id=cls.id_from_spec(spec), spec=spec) - - @classmethod - def id_from_spec(cls, spec: SimpleSourceSpec) -> str: - return str(uuid5(cls.namespace, spec.name)) - - -class DSSBundleFQID(SourcedBundleFQID[DSSSourceRef]): - pass - - -class DSSBundle(HCABundle[DSSBundleFQID]): - - @classmethod - def canning_qualifier(cls) -> str: - return 'dss.hca' - - def drs_uri(self, manifest_entry: JSON) -> str: - file_uuid = manifest_entry['uuid'] - file_version = manifest_entry['version'] - netloc = config.drs_domain or config.api_lambda_domain('service') - return str(furl(scheme='drs', - netloc=netloc, - path=(file_uuid,), - args={'version': file_version})) - - -class Plugin(RepositoryPlugin[ - DSSBundle, - SimpleSourceSpec, - DSSSourceRef, - DSSBundleFQID - ], - HasCachedHttpClient): - - def _lookup_source_id(self, spec: SimpleSourceSpec) -> str: - return DSSSourceRef.id_from_spec(spec) - - def count_bundles(self, source: SimpleSourceSpec) -> NoReturn: - assert False, 'DSS is EOL' - - def count_files(self, source: SimpleSourceSpec) -> NoReturn: - assert False, 'DSS is EOL' - - def list_sources(self, - authentication: Authentication | None - ) -> list[DSSSourceRef]: - return [ - DSSSourceRef(id=self._lookup_source_id(spec), spec=spec) - for spec in self.sources - ] - - def list_bundles(self, - source: DSSSourceRef, - prefix: str - ) -> NoReturn: - assert False, 'DSS is EOL' - - def fetch_bundle(self, bundle_fqid: DSSBundleFQID) -> NoReturn: - assert False, 'DSS is EOL' - - def list_files(self, source: DSSSourceRef, prefix: str) -> NoReturn: - assert False, 'DSS is EOL' - - def dss_subscription_query(self, prefix: str) -> JSON: - return { - "query": { - "bool": { - "must_not": [ - { - "term": { - "admin_deleted": True - } - } - ], - "must": [ - { - "exists": { - "field": "files.project_json" - } - }, - *self._prefix_clause(prefix) - ] - } - } - } - - def dss_deletion_subscription_query(self, prefix: str) -> JSON: - return { - "query": { - "bool": { - "must": [ - { - "term": { - "admin_deleted": True - } - }, - *self._prefix_clause(prefix) - ] - } - } - } - - def _prefix_clause(self, prefix): - return [ - { - 'prefix': { - 'uuid': prefix - } - } - ] if prefix else [] - - def _direct_file_url(self, - file_uuid: str, - *, - file_version: str | None = None, - replica: str | None = None, - token: str | None = None, - ) -> str | None: - dss_endpoint = one(self.sources).name - url = furl(dss_endpoint) - url.path.add(['files', file_uuid]) - url.query.add(adict(version=file_version, replica=replica, token=token)) - return str(url) - - def drs_client(self, - authentication: Authentication | None = None - ) -> DRSClient: - assert authentication is None, type(authentication) - return DRSClient(http_client=self._http_client) - - def file_download_class(self) -> type[RepositoryFileDownload]: - return DSSFileDownload - - def validate_version(self, version: str) -> None: - # Note that this validates against the DCP2 format instead of the DSS - # format (azul.dss.version_format). This is necessary due to commit - # 48ef9388 which manually updated all the canned DSS bundles to use - # DCP/2 version format. - parse_dcp2_version(version) - - -class DSSFileDownload(RepositoryFileDownload): - _location: str | None = None - _retry_after: int | None = None - - def update(self, - plugin: RepositoryPlugin, - authentication: Authentication | None - ) -> None: - self.file = attrs.evolve(self.file, drs_uri=None) # to shorten the retry URLs - if self.replica is None: - self.replica = 'aws' - assert isinstance(plugin, Plugin) - # noinspection PyProtectedMember - dss_url = plugin._direct_file_url(file_uuid=self.file.uuid, - file_version=self.file.version, - replica=self.replica, - token=self.token) - dss_response = requests.get(dss_url, allow_redirects=False) - if dss_response.status_code == 301: - retry_after = int(dss_response.headers.get('Retry-After')) - location = dss_response.headers['Location'] - - location = urllib.parse.urlparse(location) - query = urllib.parse.parse_qs(location.query, strict_parsing=True) - self.token = one(query['token']) - self.replica = one(query['replica']) - self.file = attrs.evolve(self.file, version=one(query['version'])) - self._retry_after = retry_after - elif dss_response.status_code == 302: - location = dss_response.headers['Location'] - # Remove once https://github.com/HumanCellAtlas/data-store/issues/1837 is resolved - if True: - location = urllib.parse.urlparse(location) - query = urllib.parse.parse_qs(location.query, strict_parsing=True) - expires = int(one(query['Expires'])) - bucket = location.netloc.partition('.')[0] - dss_endpoint = one(plugin.sources).name - assert bucket == aws.dss_checkout_bucket(dss_endpoint), bucket - with aws.direct_access_credentials(dss_endpoint, lambda_name='service'): - # FIXME: make region configurable (https://github.com/DataBiosphere/azul/issues/1560) - s3 = aws.client('s3', region_name='us-east-1') - params = { - 'Bucket': bucket, - 'Key': location.path[1:], - 'ResponseContentDisposition': 'attachment;filename=' + self.file.name, - } - location = s3.generate_presigned_url(ClientMethod=s3.get_object.__name__, - ExpiresIn=round(expires - time.time()), - Params=params) - self._location = location - else: - dss_response.raise_for_status() - assert False - - @property - def location(self) -> str | None: - return self._location - - @property - def retry_after(self) -> int | None: - return self._retry_after diff --git a/src/azul/plugins/repository/tdr.py b/src/azul/plugins/repository/tdr.py deleted file mode 100644 index 5b5e75d6e1..0000000000 --- a/src/azul/plugins/repository/tdr.py +++ /dev/null @@ -1,288 +0,0 @@ -from abc import ( - ABC, - abstractmethod, -) -from collections import ( - defaultdict, -) -import datetime -import logging -import time -from typing import ( - Callable, - Iterable, - TypeVar, -) - -from chalice import ( - UnauthorizedError, -) -from furl import ( - furl, -) - -from azul import ( - cache_per_thread, - require, -) -from azul.auth import ( - Authentication, - OAuth2, -) -from azul.bigquery import ( - BigQueryRows, - backtick, -) -from azul.drs import ( - AccessMethod, - DRSClient, -) -from azul.indexer import ( - Bundle, - SourcedBundleFQID, -) -from azul.plugins import ( - RepositoryFileDownload, - RepositoryPlugin, -) -from azul.strings import ( - longest_common_prefix, -) -from azul.terra import ( - TDRClient, - TDRSourceRef, - TDRSourceSpec, -) -from azul.time import ( - format_dcp2_datetime, - parse_dcp2_version, -) -from azul.types import ( - JSON, -) - -log = logging.getLogger(__name__) - - -class TDRBundleFQID(SourcedBundleFQID[TDRSourceRef]): - pass - - -class TDRBundle(Bundle[TDRBundleFQID], ABC): - - @classmethod - def canning_qualifier(cls): - return 'tdr' - - def drs_uri(self, manifest_entry: JSON) -> str | None: - return manifest_entry.get('drs_uri') - - -T = TypeVar('T') - -TDR_BUNDLE = TypeVar('TDR_BUNDLE', bound=TDRBundle) - - -class TDRPlugin[TDR_BUNDLE: TDRBundle, - TDR_BUNDLE_FQID: TDRBundleFQID]( - RepositoryPlugin[ - TDR_BUNDLE, - TDRSourceSpec, - TDRSourceRef, - TDR_BUNDLE_FQID - ] -): - - def _auth_fallback(self, - authentication: Authentication | None, - tdr_callback: Callable[[TDRClient], T] - ) -> T: - # The line below raises UnauthorizedError for invalid tokens. We don't - # want to fall back to anonymous authentication in that case. - tdr = self._user_authenticated_tdr(authentication) - try: - return tdr_callback(tdr) - except UnauthorizedError: - if authentication is None or tdr.is_registered(): - raise - else: - # Fall back to anonymous access if the request is authenticated - # using an unregistered account. - tdr = self._user_authenticated_tdr(None) - return tdr_callback(tdr) - - def list_sources(self, - authentication: Authentication | None - ) -> list[TDRSourceRef]: - configured_specs_by_name = {spec.name: spec for spec in self.sources} - # Filter by prefix of snapshot names in an attempt to speed up the - # listing by limiting the number of irrelevant snapshots returned. Note - # that TDR does a substring match, not a prefix match, but determining - # the longest common substring is complicated and, as of yet, I haven't - # found a trustworthy, reusable implementation. - filter = longest_common_prefix(configured_specs_by_name.keys()) - snapshots = self._auth_fallback(authentication, - lambda tdr: tdr.snapshot_names_by_id(filter=filter)) - snapshot_ids_by_name = { - name: id - for id, name in snapshots.items() - if name in configured_specs_by_name - } - return [ - TDRSourceRef(id=id, - spec=configured_specs_by_name[name]) - for name, id in snapshot_ids_by_name.items() - ] - - def list_source_ids(self, - authentication: Authentication | None - ) -> set[str]: - return self._auth_fallback(authentication, - lambda tdr: tdr.snapshot_ids()) - - @property - def tdr(self): - return self._tdr() - - # To utilize the caching of certain responses that's occurring within - # the TDR and DRS client instances (from the TDR API and identifiers.org, - # respectively), we need to cache these client instances. If we cached the - # client instances within the plugin instance, we would get one client - # instance per plugin instance. The plugin is instantiated frequently and in - # a variety of contexts. - # - # Because of that, caching the plugin instances would be a more invasive - # change than simply caching the client instances per plugin class. That's - # why these are class methods. The clients use urllib3, whose thread-safety - # is disputed (https://github.com/urllib3/urllib3/issues/1252), so have to - # cache client instances per-class AND per-thread. - - @classmethod - @cache_per_thread - def _tdr(cls): - return TDRClient.for_indexer() - - @classmethod - @cache_per_thread - def _user_authenticated_tdr(cls, - authentication: Authentication | None - ) -> TDRClient: - if authentication is None: - tdr = TDRClient.for_anonymous_user() - elif isinstance(authentication, OAuth2): - tdr = TDRClient.for_registered_user(authentication) - else: - raise PermissionError('Unsupported authentication format', - type(authentication)) - return tdr - - @classmethod - @cache_per_thread - def _drs_client(cls, - authentication: Authentication | None = None - ) -> DRSClient: - return cls._user_authenticated_tdr(authentication).drs_client() - - def _lookup_source_id(self, spec: TDRSourceSpec) -> str: - return self.tdr.lookup_source(spec) - - def fetch_bundle(self, bundle_fqid: TDRBundleFQID) -> TDR_BUNDLE: - self._assert_source(bundle_fqid.source) - now = time.time() - bundle = self._emulate_bundle(bundle_fqid) - log.info('It took %.003fs to download bundle %s.%s', - time.time() - now, bundle.uuid, bundle.version) - return bundle - - @classmethod - def format_version(cls, version: datetime.datetime) -> str: - return format_dcp2_datetime(version) - - def _run_sql(self, query) -> BigQueryRows: - return self.tdr.run_sql(query) - - def _full_table_name(self, source: TDRSourceSpec, table_name: str) -> str: - return source.qualify_table(table_name) - - @abstractmethod - def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDR_BUNDLE: - raise NotImplementedError - - def drs_client(self, - authentication: Authentication | None = None - ) -> DRSClient: - return self._drs_client(authentication) - - def file_download_class(self) -> type[RepositoryFileDownload]: - return TDRFileDownload - - def validate_version(self, version: str) -> None: - parse_dcp2_version(version) - - def find_in_source(self, - source: TDRSourceSpec, - string: str - ) -> Iterable[JSON]: - log.info('Validating snapshot %s', source) - query = f''' - SELECT table_name, column_name - FROM {backtick(self._full_table_name(source, 'INFORMATION_SCHEMA.COLUMNS'))} - ''' - table_columns = defaultdict(list) - for row in self._run_sql(query): - table_name, column_name = row['table_name'], row['column_name'] - assert isinstance(table_name, str), table_name - assert isinstance(column_name, str), column_name - table_columns[table_name].append(column_name) - for table_name, columns in table_columns.items(): - log.info('Validating table %s', table_name) - for column in columns: - query = f''' - SELECT datarepo_row_id, {column} - FROM {backtick(self._full_table_name(source, table_name))} - WHERE CONTAINS_SUBSTR({column}, {string!r}) - ''' - for row in self._run_sql(query): - match = { - 'catalog': self.catalog, - 'spec': str(source), - 'table': table_name, - 'column': column, - 'row_id': row['datarepo_row_id'], - 'value': row[column] - } - log.warning('Undesired string found: %r', match) - yield match - - -class TDRFileDownload(RepositoryFileDownload): - _location: str | None = None - - needs_drs_uri = True - - def update(self, - plugin: RepositoryPlugin, - authentication: Authentication | None - ) -> None: - require(self.replica is None or self.replica == 'gcp') - if self.file.drs_uri is None: - assert self.location is None, self - assert self.retry_after is None, self - else: - drs_client = plugin.drs_client(authentication) - access = drs_client.get_object(self.file.drs_uri, - access_method=AccessMethod.gs) - require(access.method is AccessMethod.https, access.method) - require(access.headers is None, access.headers) - signed_url = access.url - args = furl(signed_url).args - require('X-Goog-Signature' in args, args) - self._location = signed_url - - @property - def location(self) -> str | None: - return self._location - - @property - def retry_after(self) -> int | None: - return None diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py deleted file mode 100644 index 206aed5e39..0000000000 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ /dev/null @@ -1,940 +0,0 @@ -import datetime -from enum import ( - Enum, -) -import itertools -import logging -from operator import ( - itemgetter, -) -from typing import ( - AbstractSet, - Callable, - Iterable, -) -import uuid - -import attrs -from more_itertools import ( - one, -) - -from azul import ( - R, - cached_property, - config, - uuids, -) -from azul.bigquery import ( - BigQueryRow, - backtick, -) -from azul.collections import ( - singleton, -) -from azul.drs import ( - DRSURI, -) -from azul.indexer import ( - Prefix, -) -from azul.indexer.document import ( - EntityReference, - EntityType, -) -from azul.plugins.metadata.anvil import ( - AnvilFile, -) -from azul.plugins.metadata.anvil.bundle import ( - AnvilBundle, - EntityLink, - Key, - KeyLink, - KeyReference, -) -from azul.plugins.metadata.anvil.schema import ( - anvil_schema, -) -from azul.plugins.repository.tdr import ( - TDRBundle, - TDRBundleFQID, - TDRPlugin, -) -from azul.terra import ( - TDRSourceRef, - TDRSourceSpec, -) -from azul.types import ( - MutableJSON, - MutableJSONs, -) -from azul.uuids import ( - change_version, -) - -log = logging.getLogger(__name__) - -Keys = AbstractSet[KeyReference] -MutableKeys = set[KeyReference] -KeysByType = dict[EntityType, AbstractSet[Key]] -MutableKeysByType = dict[EntityType, set[Key]] -KeyLinks = set[KeyLink] - - -class BundleType(Enum): - """ - Unlike HCA, AnVIL has no inherent notion of a "bundle". Its data model is - strictly relational: each row in a table represents an entity, each entity - has a primary key, and entities reference each other via foreign keys. - During indexing, we dynamically construct bundles by querying each table in - the snapshot. This class enumerates the tables that require special - strategies for listing and fetching their bundles. - - An orphan is defined as an AnVIL entity that does not appear in any of - Azul's `/index/{entity_type}`. Bundles *can* contain orphans, but they will - only ever manifest as replicas in our index. A *local orphan* is an entity - in a bundle that is not referenced anywhere in that bundle's links. Local - orphans may or may not be true/global orphans (because they may still be - references in *other* bundles' links), but all global orphans are always - local orphans. Bundles only contain local orphans from the table that - matches the bundle's `table_name` attribute. - - Primary bundles are defined by a biosample entity, termed the *bundle - entity*. Each primary bundle includes all of the bundle entity's descendants - and all of those entities' ancestors. Descendants and ancestors are - discovered by iteratively following foreign keys. Biosamples were chosen to - act as the bundle entities for primary bundles based on a desirable balance - between the size and number of the resulting bundles as well as the degree - of overlap between them. The implementation of the graph traversal is - tightly coupled to this choice, and switching to a different bundle entity - type would require re-implementing much of the Plugin code. Primary bundles - consist of at least one biosample (the bundle entity), exactly one dataset - entity, and zero or more other entities of assorted types. Primary bundles - never contain local orphans because they are bijective to rows in the - biosample table. - - Supplementary bundles consist of batches of file entities, which may include - supplementary files. Supplementary files lack any foreign keys that would - associate them with any other entity. Each supplementary bundle also - includes a dataset entity, and we create synthetic links between the - supplementary files and the dataset. Without these links, the relationship - between these files and their parent dataset would not be properly - represented in the service response. Supplementary files therefore are never - local or global orphans. - - Normal (non-supplementary) files are not linked to the dataset and thus are - local orphans within these bundles. This is because these files may also - appear in primary bundles. If they do, then those bundles will contribute - them to the index alongside all of their linked entities. If they don't, - then they are global orphans. In either case, it would be pointless for a - supplementary bundle to emit contributions for them, hence we treat them as - orphans. - - DUOS bundles consist of a single dataset entity. This "entity" includes the - DUOS ID retrieved from TDR and dataset description retrieved from DUOS, - while a copy of the BigQuery row for this dataset is also included as an - orphan. We chose this design because there is only one dataset per snapshot, - which is referenced in all bundles. Therefore, only one request to DUOS per - *snapshot* is necessary. If the DUOS `description` were retrieved at the - same time as the other fields of the dataset entity, we would make one - request per *bundle* instead, potentially overloading the DUOS service. Our - solution is to retrieve `description` only in a bundle of this dedicated - DUOS type, once per snapshot, and merge it with the other dataset fields - during aggregation. As a result, `duos_id` cannot be included in file - manifests since there is only one DUOS bundle per dataset, and that bundle - only contributes to outer entities of the `datasets` type, not to entities - of the other types, such as files, which the manifest is generated from. - - All other bundles are replica bundles. Replica bundles consist of a batch of - rows from an arbitrary BigQuery table, which may or may not be described by - the AnVIL schema, and the snapshot's dataset entity. Replica bundles contain - no links and thus all of their entities are local orphans. - """ - primary = 'anvil_biosample' - supplementary = 'anvil_file' - duos = 'anvil_dataset' - - @classmethod - def is_batched(cls, table_name: str) -> bool: - """ - True if bundles for the table of the given name represent batches of - rows, or False if each bundle represents a single row. - - >>> BundleType.is_batched(BundleType.primary.value) - False - - >>> BundleType.is_batched('anvil_activity') - True - """ - return table_name not in (cls.primary.value, cls.duos.value) - - -@attrs.frozen(kw_only=True, eq=False) -class TDRAnvilBundleFQID(TDRBundleFQID): - table_name: str - batch_prefix: str | None - - def __attrs_post_init__(self): - should_be_batched = BundleType.is_batched(self.table_name) - is_batched = self.is_batched - assert is_batched == should_be_batched, self - if is_batched: - assert len(self.batch_prefix) <= 8, self - - @property - def is_batched(self) -> bool: - return self.batch_prefix is not None - - -class TDRAnvilBundle(AnvilBundle[TDRAnvilBundleFQID], TDRBundle): - - @classmethod - def canning_qualifier(cls) -> str: - return super().canning_qualifier() + '.anvil' - - def add_entity(self, - entity: EntityReference, - version: str, - row: MutableJSON, - *, - is_orphan: bool = False - ) -> None: - target = self.orphans if is_orphan else self.entities - # In DUOS bundles, the dataset is represented as both as entity and an - # orphan - assert entity not in target, entity - metadata = dict(row, - version=version) - if entity.entity_type == 'anvil_file': - drs_uri = row['file_ref'] - # Validate URI syntax - DRSURI.parse(drs_uri) - metadata.update(drs_uri=drs_uri, - sha256='', - crc32='') - target[entity] = metadata - - def add_links(self, links: Iterable[EntityLink]): - self.links.update(links) - EntityLink.group_by_activity(self.links) - - -class Plugin(TDRPlugin[TDRAnvilBundle, TDRAnvilBundleFQID]): - - @cached_property - def _version(self): - return self.format_version(datetime.datetime(year=2022, - month=6, - day=1, - hour=0, - tzinfo=datetime.timezone.utc)) - - datarepo_row_uuid_version = 4 - batch_uuid_version = 5 - bundle_uuid_version = 10 - - def _batch_uuid(self, - source: TDRSourceSpec, - table_name: str, - batch_prefix: str - ) -> str: - namespace = uuid.UUID('b8b3ac80-e035-4904-8b02-2d04f9e9a369') - batch_uuid = uuid.uuid5(namespace, f'{source}:{table_name}:{batch_prefix}') - return change_version(str(batch_uuid), - self.batch_uuid_version, - self.bundle_uuid_version) - - def count_files(self, source: TDRSourceSpec) -> int: - prefix = '' if source.prefix is None else source.prefix.common - assert prefix == prefix.lower(), source - query = f''' - SELECT COUNT(*) AS count - FROM {backtick(self._full_table_name(source, 'anvil_file'))} - WHERE STARTS_WITH(LOWER(file_md5sum), {prefix!r}) - ''' - return one(self._run_sql(query))['count'] - - def count_bundles(self, source: TDRSourceSpec) -> int: - prefix = '' if source.prefix is None else source.prefix.common - assert prefix == prefix.lower(), source - primary_count = one(self._run_sql(f''' - SELECT COUNT(*) AS count - FROM {backtick(self._full_table_name(source, BundleType.primary.value))} - WHERE STARTS_WITH(LOWER(datarepo_row_id), {prefix!r}) - '''))['count'] - duos_count = 0 if config.duos_service_url is None else one(self._run_sql(f''' - SELECT COUNT(*) AS count - FROM {backtick(self._full_table_name(source, BundleType.duos.value))} - WHERE STARTS_WITH(LOWER(datarepo_row_id), {prefix!r}) - '''))['count'] - sizes_by_table = self._batch_tables(source, prefix) - batched_count = sum(batch_size for (_, batch_size) in sizes_by_table.values()) - return primary_count + duos_count + batched_count - - def list_bundles(self, - source: TDRSourceRef, - prefix: str - ) -> list[TDRAnvilBundleFQID]: - self._assert_source(source) - self._assert_partition(source, prefix) - assert prefix == prefix.lower(), prefix - bundles = [] - spec = source.spec - - if config.duos_service_url is not None: - # We intentionally omit the WHERE clause for datasets in order to - # verify our assumption that each snapshot only contains rows for a - # single dataset. This verification is performed independently and - # concurrently for every partition, but only one partition actually - # emits the bundle. - row = one(self._run_sql(f''' - SELECT datarepo_row_id - FROM {backtick(self._full_table_name(spec, BundleType.duos.value))} - ''')) - dataset_row_id = row['datarepo_row_id'] - if dataset_row_id.startswith(prefix): - bundle_uuid = change_version(dataset_row_id, - self.datarepo_row_uuid_version, - self.bundle_uuid_version) - bundle_fqid = TDRAnvilBundleFQID(uuid=bundle_uuid, - version=self._version, - source=source, - table_name=BundleType.duos.value, - batch_prefix=None) - bundles.append(bundle_fqid) - for row in self._run_sql(f''' - SELECT datarepo_row_id - FROM {backtick(self._full_table_name(spec, BundleType.primary.value))} - WHERE STARTS_WITH(LOWER(datarepo_row_id), {prefix!r}) - '''): - bundle_uuid = change_version(row['datarepo_row_id'], - self.datarepo_row_uuid_version, - self.bundle_uuid_version) - bundle_fqid = TDRAnvilBundleFQID(uuid=bundle_uuid, - version=self._version, - source=source, - table_name=BundleType.primary.value, - batch_prefix=None) - bundles.append(bundle_fqid) - prefix_lengths_by_table = self._batch_tables(source.spec, prefix) - for table_name, (batch_prefix_length, _) in prefix_lengths_by_table.items(): - batch_prefixes = Prefix(common=prefix, - partition=batch_prefix_length - len(prefix)).partition_prefixes() - for batch_prefix in batch_prefixes: - bundle_uuid = self._batch_uuid(spec, table_name, batch_prefix) - bundles.append(TDRAnvilBundleFQID(uuid=bundle_uuid, - version=self._version, - source=source, - table_name=table_name, - batch_prefix=batch_prefix)) - return bundles - - def list_files(self, source: TDRSourceRef, prefix: str) -> list[AnvilFile]: - self._assert_source(source) - self._assert_partition(source, prefix) - batch = self._get_batch(source.spec, - 'anvil_file', - prefix, - key_column='file_md5sum') - return [ - AnvilFile(uuid=ref.entity_id, - name=row['file_name'], - version=self._version, - size=row['file_size'], - md5=row['file_md5sum'], - drs_uri=row['file_ref']) - for ref, row in batch - ] - - def _emulate_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: - if bundle_fqid.table_name == BundleType.primary.value: - log.info('Bundle %r is a primary bundle', bundle_fqid.uuid) - return self._primary_bundle(bundle_fqid) - elif bundle_fqid.table_name == BundleType.supplementary.value: - log.info('Bundle %r is a supplementary bundle', bundle_fqid.uuid) - return self._supplementary_bundle(bundle_fqid) - elif bundle_fqid.table_name == BundleType.duos.value: - assert config.duos_service_url is not None, bundle_fqid - log.info('Bundle %r is a DUOS bundle', bundle_fqid.uuid) - return self._duos_bundle(bundle_fqid) - else: - log.info('Bundle %r is a replica bundle', bundle_fqid.uuid) - return self._replica_bundle(bundle_fqid) - - def _batch_tables(self, - source: TDRSourceSpec, - prefix: str, - ) -> dict[str, tuple[int, int]]: - """ - Find a batch prefix length that yields as close to 256 rows per batch - as possible for each table within the specified partition. The result's - keys are table names and its values are tuples where the first element - is the prefix length (*including* the partition prefix) and the second - element is the resulting number of batches. Tables are only included in - the result if they are non-empty and are used to produce batched bundle - formats (i.e. replica and supplementary). - - Because the partitions of a table do not contain exactly the same number - of bundles, calculating the batch size statistics for the entire table - at once produces a different result than performing the same calculation - for any individual partition. We expect the inconsistencies to average - out across partitions so that `count_bundles` and `list_bundles` give - consistent results as long the partition size is substantially larger - than the batch size. - - This method relies on BigQuery's `AVG` function, which is - nondeterministic for floating-point return values. The probability that - this affects this method's return value is very small, but nonzero. - https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate_functions#avg - """ - assert prefix == prefix.lower(), prefix - max_length = 4 - - def repeat(fmt): - return ', '.join(fmt.format(i=i) for i in range(1, max_length + 1)) - - target_size = 256 - prefix_len = len(prefix) - table_names = self.tdr.list_tables(source) - # This table is present in all snapshots. It is large and contains no - # useful metadata, so we skip indexing replicas from it. - table_names.discard('datarepo_row_ids') - table_names = sorted(filter(BundleType.is_batched, table_names)) - log.info('Calculating batch prefix lengths for partition %r of %d tables ' - 'in source %s', prefix, len(table_names), source) - # The extraneous outer 'SELECT *' works around a bug in BigQuery emulator - # FIXME: BigQuery Emulator rejects valid query - # https://github.com/DataBiosphere/azul/issues/6704 - query = ' UNION ALL '.join(f'''( - SELECT * FROM ( - SELECT - {table_name!r} AS table_name, - {prefix_len} + LENGTH(CONCAT( - {repeat('IFNULL(p{i}, "")')} - )) AS batch_prefix_length, - AVG(num_rows) AS average_batch_size, - COUNT(*) AS num_batches - FROM ( - SELECT - {repeat(f'LOWER(SUBSTR(datarepo_row_id, {prefix_len} + {{i}}, 1)) AS p{{i}}')}, - COUNT(*) AS num_rows - FROM {backtick(self._full_table_name(source, table_name))} - WHERE STARTS_WITH(LOWER(datarepo_row_id), {prefix!r}) - GROUP BY ROLLUP ({repeat('p{i}')}) - ) - GROUP BY batch_prefix_length - ORDER BY ABS({target_size} - average_batch_size) - LIMIT 1 - ) - )''' for table_name in table_names) - - def result(row): - table_name = row['table_name'] - prefix_length = row['batch_prefix_length'] - average_size = row['average_batch_size'] - num_batches = row['num_batches'] - log.info('Selected batch prefix length %d for table %r (average ' - 'batch size %.1f, num batches %d)', - prefix_length, table_name, average_size, num_batches) - return table_name, (prefix_length, num_batches) - - return dict(map(result, self._run_sql(query))) - - def _primary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: - assert not bundle_fqid.is_batched, bundle_fqid - source = bundle_fqid.source - bundle_entity = self._bundle_entity(bundle_fqid) - - keys: MutableKeys = {bundle_entity} - links: KeyLinks = set() - - for method in [self._follow_downstream, self._follow_upstream]: - method: Callable[[TDRSourceSpec, KeysByType], KeyLinks] - n = len(keys) - frontier: Keys = keys - while frontier: - new_links = method(source.spec, self._consolidate_by_type(frontier)) - links.update(new_links) - frontier = frozenset().union(*(link.all_entities for link in new_links)) - keys - keys.update(frontier) - log.debug('Found %r linked entities via method %s', - len(keys) - n, method.__name__) - - keys_by_type: KeysByType = self._consolidate_by_type(keys) - if log.isEnabledFor(logging.DEBUG): - arg = keys_by_type - else: - arg = {entity_type: len(keys) for entity_type, keys in keys_by_type.items()} - log.info('Found %i entities linked to bundle %r: %r', - len(keys), bundle_fqid.uuid, arg) - - result = TDRAnvilBundle(fqid=bundle_fqid) - entities_by_key: dict[KeyReference, EntityReference] = {} - for entity_type, typed_keys in sorted(keys_by_type.items()): - pk_column = entity_type.removeprefix('anvil_') + '_id' - rows = self._retrieve_entities(source.spec, entity_type, typed_keys) - if entity_type == 'anvil_donor': - # We expect that the foreign key `part_of_dataset_id` is - # redundant for biosamples and donors. To simplify our queries, - # we do not follow the latter during the graph traversal. - # Here, we validate our expectation. Note that the key is an - # array for biosamples, but not for donors. - dataset_id: Key = one(keys_by_type['anvil_dataset']) - for row in rows: - donor_dataset_id = row['part_of_dataset_id'] - assert donor_dataset_id == dataset_id, R( - 'Conflicting keys', donor_dataset_id, dataset_id) - for row in sorted(rows, key=itemgetter(pk_column)): - key = KeyReference(key=row[pk_column], entity_type=entity_type) - entity = EntityReference(entity_id=row['datarepo_row_id'], - entity_type=entity_type) - entities_by_key[key] = entity - result.add_entity(entity, self._version, row) - result.add_links(link.to_entity_link(entities_by_key) for link in links) - return result - - def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: - assert bundle_fqid.is_batched, bundle_fqid - source = bundle_fqid.source.spec - result = TDRAnvilBundle(fqid=bundle_fqid) - linked_file_refs = set() - for file_ref, file_row in self._get_bundle_batch(bundle_fqid): - is_supplementary = file_row['is_supplementary'] - result.add_entity(file_ref, - self._version, - dict(file_row), - is_orphan=not is_supplementary) - if is_supplementary: - linked_file_refs.add(file_ref) - dataset_ref, dataset_row = self._get_dataset(source) - result.add_entity(dataset_ref, self._version, dict(dataset_row)) - # Avoid inserting "degenerate" links with an empty list of outputs, i.e. - # in case of an empty batch (as is common on `anvilbox`). Such links - # would be harmless in production, but would complicate the bundle - # canning integration test. - if linked_file_refs: - result.add_links([ - EntityLink(inputs=singleton(dataset_ref), - outputs=frozenset(linked_file_refs)) - ]) - return result - - def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: - assert not bundle_fqid.is_batched, bundle_fqid - ref, row = self._get_dataset(bundle_fqid.source.spec) - expected_entity_id = change_version(bundle_fqid.uuid, - self.bundle_uuid_version, - self.datarepo_row_uuid_version) - assert ref.entity_id == expected_entity_id, (ref, bundle_fqid) - bundle = TDRAnvilBundle(fqid=bundle_fqid) - # Classify as orphan to suppress the emission of a contribution - bundle.add_entity(ref, self._version, dict(row), is_orphan=True) - duos_id, duos_info = self.tdr.get_duos(bundle_fqid.source) - if duos_id is not None: - entity_row = { - 'duos_id': duos_id, - 'description': duos_info.get('studyDescription'), - 'dataset_id': row['dataset_id'] - } - bundle.add_entity(ref, self._version, entity_row) - return bundle - - def _replica_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle: - assert bundle_fqid.is_batched, bundle_fqid - source = bundle_fqid.source.spec - result = TDRAnvilBundle(fqid=bundle_fqid) - batch = self._get_bundle_batch(bundle_fqid) - dataset = self._get_dataset(source) - for (ref, row) in itertools.chain([dataset], batch): - result.add_entity(ref, self._version, dict(row), is_orphan=True) - return result - - def _get_dataset(self, source: TDRSourceSpec) -> tuple[EntityReference, BigQueryRow]: - table_name = 'anvil_dataset' - columns = self._columns(table_name) - row = one(self._run_sql(f''' - SELECT {', '.join(sorted(columns))} - FROM {backtick(self._full_table_name(source, table_name))} - ''')) - ref = EntityReference(entity_type=table_name, entity_id=row['datarepo_row_id']) - return ref, row - - def _get_batch(self, - source: TDRSourceSpec, - table_name: str, - batch_prefix: str, - *, - key_column: str - ) -> Iterable[tuple[EntityReference, BigQueryRow]]: - columns = self._columns(table_name) - assert not any(map(str.isupper, batch_prefix)), source - for row in self._run_sql(f''' - SELECT {', '.join(sorted(columns))} - FROM {backtick(self._full_table_name(source, table_name))} - WHERE STARTS_WITH(LOWER({key_column}), {batch_prefix!r}) - '''): - ref = EntityReference(entity_type=table_name, entity_id=row['datarepo_row_id']) - yield ref, row - - def _get_bundle_batch(self, - bundle_fqid: TDRAnvilBundleFQID - ) -> Iterable[tuple[EntityReference, BigQueryRow]]: - return self._get_batch(bundle_fqid.source.spec, - bundle_fqid.table_name, - bundle_fqid.batch_prefix, - key_column='datarepo_row_id') - - def _bundle_entity(self, bundle_fqid: TDRAnvilBundleFQID) -> KeyReference: - source = bundle_fqid.source - bundle_uuid = bundle_fqid.uuid - entity_id = uuids.change_version(bundle_uuid, - self.bundle_uuid_version, - self.datarepo_row_uuid_version) - table_name = bundle_fqid.table_name - pk_column = table_name.removeprefix('anvil_') + '_id' - bundle_entity = one(self._run_sql(f''' - SELECT {pk_column} - FROM {backtick(self._full_table_name(source.spec, table_name))} - WHERE datarepo_row_id = '{entity_id}' - '''))[pk_column] - bundle_entity = KeyReference(key=bundle_entity, entity_type=table_name) - log.info('Bundle UUID %r resolved to primary key %r in table %r', - bundle_uuid, bundle_entity.key, table_name) - return bundle_entity - - def _consolidate_by_type(self, entities: Keys) -> MutableKeysByType: - result = { - table['name']: set() - for table in anvil_schema['tables'] - } - for e in entities: - result[e.entity_type].add(e.key) - return result - - def _follow_upstream(self, - source: TDRSourceSpec, - entities: KeysByType - ) -> KeyLinks: - return set.union( - self._upstream_from_files(source, entities['anvil_file']), - self._upstream_from_biosamples(source, entities['anvil_biosample']), - # The direction of the edges linking donors to diagnoses is - # contentious. Currently, we model diagnoses as being upstream from - # donors. This is counterintuitive, but has two important practical - # benefits. - # - # First, it greatly simplifies the process of discovering the - # diagnoses while building the bundle, because performing a complete - # *downstream* search with donors as input would be tantamount to - # using donors as bundle entities instead of biosamples, leading to - # increased bundle size and increased overlap between bundles. - # - # Each diagnosis is linked to exactly one other entity (the donor), - # so the direction in which the donor-diagnosis links are followed - # won't affect the discovery of other entities. However, edge - # direction *is* important for deciding which entities in the bundle - # are linked to each other (and thus constitute each other's - # inner/outer entities). This leads to the second and more important - # benefit of our decision to model diagnoses as being upstream from - # donors: it creates continuous directed paths through the graph - # from the diagnoses to all entities downstream of the donor. - # Without such a path, we would be unable to associate biosamples or - # files with diagnoses without adding cumbersome diagnosis-specific - # logic to the transformers' graph traversal algorithm. The only - # entities that are upstream from donors are datasets, which do not - # perform a traversal and are treated as being linked to every - # entity in the bundle regardless of the edges in the graph. - self._diagnoses_from_donors(source, entities['anvil_donor']) - ) - - def _follow_downstream(self, - source: TDRSourceSpec, - entities: KeysByType - ) -> KeyLinks: - return set.union( - self._downstream_from_biosamples(source, entities['anvil_biosample']), - self._downstream_from_files(source, entities['anvil_file']) - ) - - def _upstream_from_biosamples(self, - source: TDRSourceSpec, - biosample_ids: AbstractSet[Key] - ) -> KeyLinks: - if biosample_ids: - rows = self._run_sql(f''' - SELECT b.biosample_id, b.donor_id, b.part_of_dataset_id - FROM {backtick(self._full_table_name(source, 'anvil_biosample'))} AS b - WHERE b.biosample_id IN ({', '.join(map(repr, biosample_ids))}) - ''') - result: KeyLinks = set() - for row in rows: - outputs = singleton(KeyReference(entity_type='anvil_biosample', - key=row['biosample_id'])) - inputs = singleton(KeyReference(entity_type='anvil_dataset', - key=one(row['part_of_dataset_id']))) - result.add(KeyLink(outputs=outputs, inputs=inputs)) - for donor_id in row['donor_id']: - inputs = singleton(KeyReference(entity_type='anvil_donor', - key=donor_id)) - result.add(KeyLink(outputs=outputs, inputs=inputs)) - return result - else: - return set() - - def _upstream_from_files(self, - source: TDRSourceSpec, - file_ids: AbstractSet[Key] - ) -> KeyLinks: - if file_ids: - rows = self._run_sql(f''' - WITH file AS ( - SELECT f.file_id FROM {backtick(self._full_table_name(source, 'anvil_file'))} AS f - WHERE f.file_id IN ({', '.join(map(repr, file_ids))}) - ) - SELECT - f.file_id AS generated_file_id, - 'anvil_alignmentactivity' AS activity_table, - ama.alignmentactivity_id AS activity_id, - ama.used_file_id AS uses_file_id, - [] AS uses_biosample_id, - FROM file AS f - JOIN {backtick(self._full_table_name(source, 'anvil_alignmentactivity'))} AS ama - ON f.file_id IN UNNEST(ama.generated_file_id) - UNION ALL SELECT - f.file_id, - 'anvil_assayactivity', - aya.assayactivity_id, - [], - aya.used_biosample_id, - FROM file AS f - JOIN {backtick(self._full_table_name(source, 'anvil_assayactivity'))} AS aya - ON f.file_id IN UNNEST(aya.generated_file_id) - UNION ALL SELECT - f.file_id, - 'anvil_sequencingactivity', - sqa.sequencingactivity_id, - [], - sqa.used_biosample_id, - FROM file AS f - JOIN {backtick(self._full_table_name(source, 'anvil_sequencingactivity'))} AS sqa - ON f.file_id IN UNNEST(sqa.generated_file_id) - UNION ALL SELECT - f.file_id, - 'anvil_variantcallingactivity', - vca.variantcallingactivity_id, - vca.used_file_id, - [] - FROM file AS f - JOIN {backtick(self._full_table_name(source, 'anvil_variantcallingactivity'))} AS vca - ON f.file_id IN UNNEST(vca.generated_file_id) - UNION ALL SELECT - f.file_id, - 'anvil_activity', - a.activity_id, - a.used_file_id, - a.used_biosample_id, - FROM file AS f - JOIN {backtick(self._full_table_name(source, 'anvil_activity'))} AS a - ON f.file_id IN UNNEST(a.generated_file_id) - ''') - return { - KeyLink( - activity=KeyReference(entity_type=row['activity_table'], - key=row['activity_id']), - # The generated link is not a complete representation of the - # upstream activity because it does not include generated files - # that are not ancestors of the downstream file - outputs=singleton( - KeyReference(entity_type='anvil_file', - key=row['generated_file_id'])), - inputs=frozenset( - KeyReference(entity_type=entity_type, - key=key) - for entity_type, column in [ - ('anvil_file', 'uses_file_id'), - ('anvil_biosample', 'uses_biosample_id') - ] - for key in row[column] - ) - ) - for row in rows - } - else: - return set() - - def _diagnoses_from_donors(self, - source: TDRSourceSpec, - donor_ids: AbstractSet[Key] - ) -> KeyLinks: - if donor_ids: - rows = self._run_sql(f''' - SELECT dgn.donor_id, dgn.diagnosis_id - FROM {backtick(self._full_table_name(source, 'anvil_diagnosis'))} as dgn - WHERE dgn.donor_id IN ({', '.join(map(repr, donor_ids))}) - ''') - return { - KeyLink( - inputs=singleton( - KeyReference(key=row['diagnosis_id'], - entity_type='anvil_diagnosis')), - outputs=singleton( - KeyReference(key=row['donor_id'], - entity_type='anvil_donor')), - activity=None) - for row in rows - } - else: - return set() - - def _downstream_from_biosamples(self, - source: TDRSourceSpec, - biosample_ids: AbstractSet[Key], - ) -> KeyLinks: - if biosample_ids: - rows = self._run_sql(f''' - WITH activities AS ( - SELECT - sqa.sequencingactivity_id as activity_id, - 'anvil_sequencingactivity' as activity_table, - sqa.used_biosample_id, - sqa.generated_file_id - FROM {backtick(self._full_table_name(source, 'anvil_sequencingactivity'))} AS sqa - UNION ALL - SELECT - aya.assayactivity_id, - 'anvil_assayactivity', - aya.used_biosample_id, - aya.generated_file_id, - FROM {backtick(self._full_table_name(source, 'anvil_assayactivity'))} AS aya - UNION ALL - SELECT - a.activity_id, - 'anvil_activity', - a.used_biosample_id, - a.generated_file_id, - FROM {backtick(self._full_table_name(source, 'anvil_activity'))} AS a - ) - SELECT - biosample_id, - a.activity_id, - a.activity_table, - a.generated_file_id - FROM activities AS a, UNNEST(a.used_biosample_id) AS biosample_id - WHERE biosample_id IN ({', '.join(map(repr, biosample_ids))}) - ''') - return { - KeyLink( - inputs=singleton( - KeyReference(key=row['biosample_id'], - entity_type='anvil_biosample') - ), - outputs=frozenset( - KeyReference(key=output_id, - entity_type='anvil_file') - for output_id in row['generated_file_id'] - ), - activity=KeyReference(key=row['activity_id'], - entity_type=row['activity_table'])) - for row in rows - } - else: - return set() - - def _downstream_from_files(self, - source: TDRSourceSpec, - file_ids: AbstractSet[Key] - ) -> KeyLinks: - if file_ids: - rows = self._run_sql(f''' - WITH activities AS ( - SELECT - ala.alignmentactivity_id AS activity_id, - 'anvil_alignmentactivity' AS activity_table, - ala.used_file_id, - ala.generated_file_id - FROM {backtick(self._full_table_name(source, 'anvil_alignmentactivity'))} AS ala - UNION ALL SELECT - vca.variantcallingactivity_id, - 'anvil_variantcallingactivity', - vca.used_file_id, - vca.generated_file_id - FROM {backtick(self._full_table_name(source, 'anvil_variantcallingactivity'))} AS vca - UNION ALL SELECT - a.activity_id, - 'anvil_activity', - a.used_file_id, - a.generated_file_id - FROM {backtick(self._full_table_name(source, 'anvil_activity'))} AS a - ) - SELECT - used_file_id, - a.generated_file_id, - a.activity_id, - a.activity_table - FROM activities AS a, UNNEST(a.used_file_id) AS used_file_id - WHERE used_file_id IN ({', '.join(map(repr, file_ids))}) - ''') - return { - KeyLink( - inputs=singleton( - KeyReference(key=row['used_file_id'], - entity_type='anvil_file')), - outputs=frozenset( - KeyReference(key=file_id, - entity_type='anvil_file') - for file_id in row['generated_file_id'] - ), - activity=KeyReference(key=row['activity_id'], - entity_type=row['activity_table'])) - for row in rows - } - else: - return set() - - def _retrieve_entities(self, - source: TDRSourceSpec, - entity_type: EntityType, - keys: AbstractSet[Key], - ) -> MutableJSONs: - if keys: - columns = self._columns(entity_type) - table_name = self._full_table_name(source, entity_type) - pk_column = entity_type.removeprefix('anvil_') + '_id' - assert pk_column in columns, entity_type - log.debug('Retrieving %i entities of type %r ...', len(keys), entity_type) - rows = self._run_sql(f''' - SELECT {', '.join(sorted(columns))} - FROM {backtick(table_name)} - WHERE {pk_column} IN ({', '.join(map(repr, keys))}) - ''') - - rows = [ - { - k: self.format_version(v) if isinstance(v, datetime.datetime) else v - for k, v in row.items() - } - for row in rows - ] - log.debug('Retrieved %i entities of type %r', len(rows), entity_type) - missing = keys - {row[pk_column] for row in rows} - assert not missing, R( - f'Found only {len(rows)} out of {len(keys)} expected rows in {table_name}. ' - f'Missing entities: {missing}') - return rows - else: - return [] - - _schema_columns = { - table['name']: [column['name'] for column in table['columns']] - for table in anvil_schema['tables'] - } - - def _columns(self, table_name: str) -> set[str]: - try: - columns = self._schema_columns[table_name] - except KeyError: - return {'*'} - else: - columns = set(columns) - columns.add('datarepo_row_id') - return columns diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py deleted file mode 100644 index c1541a75bd..0000000000 --- a/src/azul/plugins/repository/tdr_hca/__init__.py +++ /dev/null @@ -1,535 +0,0 @@ -from collections import ( - defaultdict, -) -from concurrent.futures import ( - ThreadPoolExecutor, -) -from itertools import ( - islice, -) -import json -import logging -from operator import ( - itemgetter, -) -from typing import ( - ClassVar, - Iterable, - Self, - cast, -) - -import attr -from furl import ( - furl, -) -from more_itertools import ( - one, -) - -from azul import ( - R, - config, - iif, -) -from azul.bigquery import ( - BigQueryRow, - backtick, -) -from azul.collections import ( - singleton, -) -from azul.drs import ( - RegularDRSURI, -) -from azul.indexer import ( - BundleFQID, -) -from azul.indexer.document import ( - EntityID, - EntityReference, - EntityType, -) -from azul.plugins.metadata.hca import ( - HCAFile, -) -from azul.plugins.metadata.hca.bundle import ( - HCABundle, -) -from azul.plugins.repository.tdr import ( - TDRBundle, - TDRBundleFQID, - TDRPlugin, -) -from azul.strings import ( - single_quote as sq, -) -from azul.terra import ( - TDRSourceRef, - TDRSourceSpec, -) -from azul.types import ( - JSON, - JSONs, - MutableJSON, - MutableJSONs, -) -from humancellatlas.data.metadata import ( - api, -) - -log = logging.getLogger(__name__) - -Entities = set[EntityReference] -EntitiesByType = dict[EntityType, set[EntityID]] - - -@attr.s(frozen=True, auto_attribs=True) -class Links: - project: EntityReference - processes: Entities = attr.Factory(set) - protocols: Entities = attr.Factory(set) - inputs: Entities = attr.Factory(set) - outputs: Entities = attr.Factory(set) - supplementary_files: Entities = attr.Factory(set) - - @classmethod - def from_json(cls, project: EntityReference, links_json: JSON) -> Self: - """ - A `links.json` file, in a more accessible form. - - :param links_json: The contents of a `links.json` file. - - :param project: A reference to the project the given `links.json` - belongs to. - """ - self = cls(project) - for link in cast(JSONs, links_json['links']): - link_type = link['link_type'] - if link_type == 'process_link': - self.processes.add(EntityReference(entity_type=link['process_type'], - entity_id=link['process_id'])) - for category in ('input', 'output', 'protocol'): - plural = category + 's' - target = getattr(self, plural) - for entity in cast(JSONs, link[plural]): - target.add(EntityReference(entity_type=entity[category + '_type'], - entity_id=entity[category + '_id'])) - elif link_type == 'supplementary_file_link': - associate = EntityReference(entity_type=link['entity']['entity_type'], - entity_id=link['entity']['entity_id']) - # For MVP, only project entities can have associated supplementary files. - assert associate == project, R( - 'Supplementary file must be associated with the current project', - project, associate) - for entity in cast(JSONs, link['files']): - self.supplementary_files.add( - EntityReference(entity_type='supplementary_file', - entity_id=entity['file_id'])) - else: - assert False, R('Unexpected link_type', link_type) - return self - - def all_entities(self) -> Entities: - return set.union(*(value if isinstance(value, set) else {value} - for field, value in attr.asdict(self, recurse=False).items())) - - def dangling_inputs(self) -> Entities: - return { - input_ - for input_ in self.inputs - if input_.entity_type.endswith('_file') and not ( - input_ in self.outputs or - input_ in self.supplementary_files - ) - } - - -class TDRHCABundle(HCABundle[TDRBundleFQID], TDRBundle): - - @classmethod - def canning_qualifier(cls) -> str: - return super().canning_qualifier() + '.hca' - - def add_entity(self, - *, - entity: EntityReference, - row: BigQueryRow, - is_stitched: bool - ) -> None: - if is_stitched: - self.stitched.add(entity.entity_id) - if entity.entity_type.endswith('_file'): - self._add_manifest_entry(entity, self.file_from_row(row)) - content = row['content'] - self.metadata[str(entity)] = (json.loads(content) - if isinstance(content, str) - else content) - - metadata_columns: ClassVar[frozenset[str]] = singleton( - 'content' - ) - - data_columns: ClassVar[frozenset[str]] = frozenset({ - 'descriptor', - 'JSON_EXTRACT_SCALAR(content, "$.file_core.file_name") AS file_name', - 'file_id' - }) - - # `links_id` is omitted for consistency since the other sets do not include - # the primary key - links_columns: ClassVar[frozenset[str]] = singleton( - 'project_id' - ) - - @classmethod - def file_from_row(cls, row: BigQueryRow) -> HCAFile: - descriptor = json.loads(row['descriptor']) - # FIXME: Move validation of descriptor to the metadata API - # https://github.com/DataBiosphere/azul/issues/6299 - api.Entity.validate_described_by(descriptor) - return HCAFile.from_descriptor(descriptor, - uuid=descriptor['file_id'], - name=row['file_name'], - drs_uri=cls._parse_drs_uri(row['file_id'], descriptor)) - - def _add_manifest_entry(self, - entity: EntityReference, - file: HCAFile) -> None: - file_json = file.to_json() - file_json['content-type'] = file_json.pop('content_type') - file_json['indexed'] = False - self.manifest[str(entity)] = file_json - - @classmethod - def _parse_drs_uri(cls, - file_id: str | None, - descriptor: JSON - ) -> str | None: - if file_id is None: - try: - external_drs_uri = descriptor['drs_uri'] - except KeyError: - assert False, R( - '`file_id` is null and `drs_uri` is not set in file descriptor', - descriptor) - else: - # FIXME: Support non-null DRS URIs in file descriptors - # https://github.com/DataBiosphere/azul/issues/3631 - if external_drs_uri is not None: - log.warning('Non-null `drs_uri` in file descriptor (%s)', external_drs_uri) - external_drs_uri = None - return external_drs_uri - else: - # This requirement prevent mismatches in the DRS domain, and ensures - # that changes to the column syntax don't go undetected. - parsed = RegularDRSURI.parse(file_id) - assert parsed.uri.netloc == config.tdr_service_url.netloc, R( - 'Unexpected DRS URI location', parsed.uri) - return file_id - - -class Plugin(TDRPlugin[TDRHCABundle, TDRBundleFQID]): - - def count_bundles(self, source: TDRSourceSpec) -> int: - prefix = '' if source.prefix is None else source.prefix.common - assert prefix == prefix.lower(), source - query = f''' - SELECT COUNT(*) AS count - FROM {backtick(self._full_table_name(source, 'links'))} - WHERE STARTS_WITH(LOWER(datarepo_row_id), {prefix!r}) - ''' - rows = self._run_sql(query) - return one(rows)['count'] - - def count_files(self, source: TDRSourceSpec) -> int: - prefix = '' if source.prefix is None else source.prefix.common - assert prefix == prefix.lower(), source - query = ' UNION ALL '.join( - f''' - SELECT COUNT(*) AS count - FROM {backtick(self._full_table_name(source, entity_type))} - WHERE STARTS_WITH(LOWER(JSON_EXTRACT_SCALAR(descriptor, "$.sha256")), - {prefix!r}) - ''' - for entity_type, entity_cls in api.entity_types.items() - if entity_type.endswith('_file') - ) - rows = self._run_sql(query) - return sum(row['count'] for row in rows) - - def list_bundles(self, - source: TDRSourceRef, - prefix: str - ) -> list[TDRBundleFQID]: - self._assert_source(source) - self._assert_partition(source, prefix) - assert prefix == prefix.lower(), source - current_bundles = self._query_unique_sorted(f''' - SELECT links_id, version - FROM {backtick(self._full_table_name(source.spec, 'links'))} - WHERE STARTS_WITH(LOWER(links_id), {prefix!r}) - ''', group_by='links_id') - return [ - TDRBundleFQID(source=source, - uuid=row['links_id'], - version=self.format_version(row['version'])) - for row in current_bundles - ] - - def list_files(self, source: TDRSourceRef, prefix: str) -> list[HCAFile]: - self._assert_source(source) - self._assert_partition(source, prefix) - assert prefix == prefix.lower(), prefix - rows = self._run_sql(' UNION ALL '.join( - f''' - SELECT {', '.join(TDRHCABundle.data_columns)} - FROM {backtick(self._full_table_name(source.spec, entity_type))} - WHERE STARTS_WITH(LOWER(JSON_EXTRACT_SCALAR(descriptor, "$.sha256")), - {prefix!r}) - ''' - for entity_type, entity_cls in api.entity_types.items() - if entity_type.endswith('_file') - )) - return list(map(TDRHCABundle.file_from_row, rows)) - - def _query_unique_sorted(self, - query: str, - group_by: str - ) -> list[BigQueryRow]: - iter_rows = self._run_sql(query) - key = itemgetter(group_by) - rows = sorted(iter_rows, key=key) - assert len(set(map(key, rows))) == len(rows), R( - 'Expected unique keys', group_by) - return rows - - def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle: - bundle = TDRHCABundle(fqid=bundle_fqid, - manifest={}, - metadata={}, - links={}) - entities, root_entities, links_jsons = self._stitch_bundles(bundle) - bundle.links = self._merge_links(links_jsons) - - with ThreadPoolExecutor(max_workers=config.num_tdr_workers) as executor: - futures = { - entity_type: executor.submit(self._retrieve_entities, - bundle.fqid.source.spec, - entity_type, - entity_ids) - for entity_type, entity_ids in entities.items() - } - for entity_type, future in futures.items(): - e = future.exception() - if e is None: - rows = future.result() - pk_column = entity_type + '_id' - rows.sort(key=itemgetter(pk_column)) - for row in rows: - entity = EntityReference(entity_id=row[pk_column], entity_type=entity_type) - is_stitched = entity not in root_entities - bundle.add_entity(entity=entity, - row=row, - is_stitched=is_stitched) - else: - log.error('TDR worker failed to retrieve entities of type %r', - entity_type, exc_info=e) - raise e - return bundle - - def _stitch_bundles(self, - root_bundle: TDRHCABundle - ) -> tuple[EntitiesByType, Entities, MutableJSONs]: - """ - Recursively follow dangling inputs to collect entities from upstream - bundles, ensuring that no bundle is processed more than once. - """ - source = root_bundle.fqid.source - entities: EntitiesByType = defaultdict(set) - root_entities = None - unprocessed: set[TDRBundleFQID] = {root_bundle.fqid} - processed: set[TDRBundleFQID] = set() - stitched_links: MutableJSONs = [] - # Retrieving links in batches eliminates the risk of exceeding - # BigQuery's maximum query size. Using a batches size 1000 appears to be - # equally performant as retrieving the links without batching. - batch_size = 1000 - while unprocessed: - batch = set(islice(unprocessed, batch_size)) - links = self._retrieve_links(batch) - processed.update(batch) - unprocessed -= batch - stitched_links.extend(links.values()) - all_dangling_inputs: set[EntityReference] = set() - for links_id, links_json in links.items(): - project = EntityReference(entity_type='project', - entity_id=links_json['project_id']) - links = Links.from_json(project, links_json['content']) - linked_entities = links.all_entities() - dangling_inputs = links.dangling_inputs() - if links_id == root_bundle.fqid: - assert root_entities is None - root_entities = linked_entities - dangling_inputs - for entity in linked_entities: - entities[entity.entity_type].add(entity.entity_id) - if dangling_inputs: - log.info('There are %i dangling inputs in bundle %r', len(dangling_inputs), links_id) - log.debug('Dangling inputs in bundle %r: %r', links_id, dangling_inputs) - all_dangling_inputs.update(dangling_inputs) - else: - log.info('Bundle %r is self-contained', links_id) - if all_dangling_inputs: - upstream = self._find_upstream_bundles(source, all_dangling_inputs) - unprocessed |= upstream - processed - - assert root_entities is not None - processed.remove(root_bundle.fqid) - if processed: - arg = f': {processed!r}' if log.isEnabledFor(logging.DEBUG) else '' - log.info('Stitched %i bundle(s)%s', len(processed), arg) - return entities, root_entities, stitched_links - - def _retrieve_links(self, - links_ids: set[TDRBundleFQID] - ) -> dict[TDRBundleFQID, MutableJSON]: - """ - Retrieve links entities from BigQuery and parse the `content` column. - :param links_ids: Which links entities to retrieve. - """ - source = one({fqid.source.spec for fqid in links_ids}) - links = self._retrieve_entities(source, 'links', links_ids) - links = { - # Copy the values so we can reassign `content` below - fqid: dict(one(links_json - for links_json in links - if links_json['links_id'] == fqid.uuid)) - for fqid in links_ids - } - for links_json in links.values(): - links_json['content'] = json.loads(links_json['content']) - return links - - def _retrieve_entities(self, - source: TDRSourceSpec, - entity_type: EntityType, - entity_ids: set[EntityID] | set[BundleFQID], - ) -> list[BigQueryRow]: - """ - Efficiently retrieve multiple entities from BigQuery in a single query. - - :param source: Snapshot containing the entity table - - :param entity_type: The type of entity, corresponding to the table name - - :param entity_ids: For links, the fully qualified UUID and version of - each `links` entity. For other entities, just the UUIDs. - """ - pk_column = entity_type + '_id' - version_column = 'version' - columns = { - pk_column, - *TDRHCABundle.metadata_columns, - *iif(entity_type == 'links', TDRHCABundle.links_columns), - *iif(entity_type.endswith('_file'), TDRHCABundle.data_columns) - } - table_name = backtick(self._full_table_name(source, entity_type)) - entity_id_type = one(set(map(type, entity_ids))) - - if entity_type == 'links': - assert issubclass(entity_id_type, BundleFQID), entity_id_type - entity_ids = cast(set[BundleFQID], entity_ids) - where_columns = (pk_column, version_column) - where_values = ( - (sq(fqid.uuid), f'TIMESTAMP({sq(fqid.version)})') - for fqid in entity_ids - ) - expected = {fqid.uuid for fqid in entity_ids} - else: - assert issubclass(entity_id_type, str), (entity_type, entity_id_type) - where_columns = (pk_column,) - where_values = ((sq(str(entity_id)),) for entity_id in entity_ids) - expected = entity_ids - query = f''' - SELECT {', '.join(columns)} - FROM {table_name} - WHERE {self._in(where_columns, where_values)} - ''' - log.debug('Retrieving %i entities of type %r ...', len(entity_ids), entity_type) - rows = self._query_unique_sorted(query, group_by=pk_column) - log.debug('Retrieved %i entities of type %r', len(rows), entity_type) - missing = expected - {row[pk_column] for row in rows} - assert not missing, R( - f'Found only {len(rows)} out of {len(entity_ids)} expected rows in {table_name}. ' - f'Missing entities: {missing}') - return rows - - def _in(self, - columns: tuple[str, ...], - values: Iterable[tuple[str, ...]] - ) -> str: - """ - >>> plugin = Plugin(catalog='') - >>> plugin._in(('foo', 'bar'), [('"abc"', '123'), ('"def"', '456')]) - '(foo, bar) IN (("abc", 123), ("def", 456))' - """ - - def join(i): - return '(' + ', '.join(i) + ')' - - return join(columns) + ' IN ' + join(map(join, values)) - - def _find_upstream_bundles(self, - source: TDRSourceRef, - outputs: Entities) -> set[TDRBundleFQID]: - """ - Search for bundles containing processes that produce the specified output - entities. - """ - output_ids = [output.entity_id for output in outputs] - output_id = 'JSON_EXTRACT_SCALAR(link_output, "$.output_id")' - rows = self._run_sql(f''' - SELECT links_id, version, {output_id} AS output_id - FROM {backtick(self._full_table_name(source.spec, 'links'))} AS links - JOIN UNNEST(JSON_EXTRACT_ARRAY(links.content, '$.links')) AS content_links - ON JSON_EXTRACT_SCALAR(content_links, '$.link_type') = 'process_link' - JOIN UNNEST(JSON_EXTRACT_ARRAY(content_links, '$.outputs')) AS link_output - ON {output_id} IN UNNEST({output_ids}) - ''') - bundles = set() - outputs_found = set() - for row in rows: - bundles.add(TDRBundleFQID(source=source, - uuid=row['links_id'], - version=self.format_version(row['version']))) - outputs_found.add(row['output_id']) - missing = set(output_ids) - outputs_found - assert not missing, R(f'Dangling inputs not found in any bundle: {missing}') - return bundles - - def _merge_links(self, links_jsons: MutableJSONs) -> MutableJSON: - """ - Merge the links.json documents from multiple stitched bundles into a - single document. - """ - root, *stitched = links_jsons - if stitched: - source_contents = [row['content'] for row in links_jsons] - # FIXME: Explicitly verify compatible schema versions for stitched subgraphs - # https://github.com/DataBiosphere/azul/issues/3215 - schema_type = 'links' - schema_version = '3.0.0' - schema_url = furl(url='https://schema.humancellatlas.org', - path=('system', schema_version, schema_type)) - merged_content = { - 'schema_type': schema_type, - 'schema_version': schema_version, - 'describedBy': str(schema_url), - 'links': sum((sc['links'] for sc in source_contents), start=[]) - } - assert merged_content.keys() == one({ - frozenset(sc.keys()) for sc in source_contents - }), merged_content - return merged_content - else: - return root['content'] diff --git a/src/azul/queues.py b/src/azul/queues.py deleted file mode 100644 index 03c2faebd5..0000000000 --- a/src/azul/queues.py +++ /dev/null @@ -1,591 +0,0 @@ -import builtins -from collections import ( - deque, -) -from collections.abc import ( - Iterable, - Mapping, -) -from concurrent.futures import ( - Future, - ThreadPoolExecutor, - as_completed, -) -from datetime import ( - datetime, -) -from enum import ( - Enum, -) -from itertools import ( - chain, - islice, -) -import json -import logging -from math import ( - ceil, -) -import os -import time -from typing import ( - Self, - TYPE_CHECKING, - cast, -) -import uuid - -import attrs -from chalice.app import ( - SQSRecord, -) -import more_itertools -from more_itertools import ( - chunked, - one, -) - -from azul import ( - R, - cached_property, - config, -) -from azul.deployment import ( - aws, -) -from azul.files import ( - write_file_atomically, -) -from azul.json import ( - Serializable, -) -from azul.lambdas import ( - Lambdas, -) -from azul.modules import ( - load_app_module, -) -from azul.types import ( - AnyJSON, - JSON, - json_mapping, - json_str, -) - -log = logging.getLogger(__name__) - -if TYPE_CHECKING: - from mypy_boto3_sqs.type_defs import ( - ChangeMessageVisibilityBatchRequestEntryTypeDef, - SendMessageBatchRequestEntryTypeDef, - SendMessageRequestQueueSendMessageTypeDef, - ) - from mypy_boto3_sqs.service_resource import ( - Message, - Queue, - ) - - -@attrs.frozen(kw_only=True) -class SQSMessage: - body: JSON - - #: Approximate number of times this message has been received, or None if - #: this message was not received from a queue - #: - attempts: int | None = None - - #: The ID of this message in the queue, or None if this message was not - #: received from a queue. - #: - id: str | None = None - - def to_entry(self) -> 'SendMessageRequestQueueSendMessageTypeDef': - return {'MessageBody': json.dumps(self.body)} - - def to_batch_entry(self, id: int) -> 'SendMessageBatchRequestEntryTypeDef': - return {**self.to_entry(), 'Id': str(id)} - - @classmethod - def from_record(cls, record: SQSRecord) -> Self: - attributes = json_mapping(record.to_dict()['attributes']) - return cls(id=json_str(record.to_dict()['messageId']), - body=json.loads(record.body), - attempts=int(json_str(attributes['ApproximateReceiveCount']))) - - -@attrs.frozen(kw_only=True) -class SQSFifoMessage(SQSMessage): - group_id: str - dedup_id: str = attrs.field(factory=lambda: str(uuid.uuid4())) - - def to_entry(self) -> 'SendMessageRequestQueueSendMessageTypeDef': - return { - **super().to_entry(), - 'MessageGroupId': self.group_id, - 'MessageDeduplicationId': self.dedup_id - } - - @classmethod - def from_record(cls, record: SQSRecord) -> Self: - attributes = json_mapping(record.to_dict()['attributes']) - return cls(id=json_str(record.to_dict()['messageId']), - body=json.loads(record.body), - attempts=int(json_str(attributes['ApproximateReceiveCount'])), - group_id=json_str(attributes['MessageGroupId']), - dedup_id=json_str(attributes['MessageDeduplicationId'])) - - -class Queues: - #: The number of messages to be queued in a single SQS SendMessageBatch - #: action. Theoretically, larger batches are better but SQS currently limits - #: the SendMessageBatch size to 10. This is also used to configure the - #: number of SQS messages that Lambda delivers to a function bound to a - #: queue. Lambda can deliver at most 10 FIFO messages or 10,000 standard - #: messages. - #: - batch_size = 10 - - def __init__(self, delete: bool = False, json_body: bool = True): - self._delete = delete - self._json_body = json_body - - def list(self): - log.info('Listing queues') - print(f'\n{"Queue Name":<35s}' - f'{"Messages Available":^20s}' - f'{"Messages In Flight":^20s}' - f'{"Messages Delayed":^18s}\n') - queues = self.all_queues() - for queue_name, queue in queues.items(): - print(f'{queue_name:<35s}' - f'{queue.attributes["ApproximateNumberOfMessages"]:^20s}' - f'{queue.attributes["ApproximateNumberOfMessagesNotVisible"]:^20s}' - f'{queue.attributes["ApproximateNumberOfMessagesDelayed"]:^18s}') - - def dump(self, queue_name: str, path: str): - queue = aws.sqs_queue(queue_name) - self._dump(queue, path) - - def dump_all(self): - for queue_name, queue in self.all_queues().items(): - self._dump(queue, queue_name + '.json') - - def _dump(self, queue: 'Queue', path: str): - log.info('Writing messages from queue %r to file %r', queue.url, path) - messages = self._get_messages(queue) - self._dump_messages(messages, queue.url, path) - log.info(f'Finished writing {path!r}') - self._cleanup_messages(queue, messages) - - def _get_messages(self, queue: 'Queue') -> builtins.list['Message']: - messages: list['Message'] = [] - while True: - message_batch = queue.receive_messages(AttributeNames=['All'], - MaxNumberOfMessages=10, - VisibilityTimeout=300) - if not message_batch: # Nothing left in queue - return messages - else: - messages.extend(message_batch) - - def read_messages(self, queue: 'Queue') -> builtins.list['Message']: - messages = self._get_messages(queue) - self._cleanup_messages(queue, messages) - return messages - - def send_messages(self, queue: 'Queue', messages: Iterable[SQSMessage]) -> int: - num_messages = 0 - for batch in chunked(messages, self.batch_size): - entries = [message.to_batch_entry(i) for i, message in enumerate(batch)] - queue.send_messages(Entries=entries) - num_messages += len(batch) - return num_messages - - def send_message(self, queue: 'Queue', message: SQSMessage): - queue.send_message(**message.to_entry()) - - def _cleanup_messages(self, queue: 'Queue', messages: Iterable['Message']): - message_batches = list(more_itertools.chunked(messages, self.batch_size)) - if self._delete: - log.info('Removing messages from queue %r', queue.url) - self._delete_messages(message_batches, queue) - else: - log.info('Returning messages to queue %r', queue.url) - self._return_messages(message_batches, queue) - - def _dump_messages(self, - messages: Iterable['Message'], - queue_url: str, - path: str): - messages = [self._condense(message) for message in messages] - with write_file_atomically(path) as file: - content = { - 'queue': queue_url, - 'messages': messages - } - json.dump(content, file, indent=4) - log.info('Wrote %i messages', len(messages)) - - def _return_messages(self, - message_batches: Iterable[Iterable['Message']], - queue: 'Queue'): - for message_batch in message_batches: - batch: list['ChangeMessageVisibilityBatchRequestEntryTypeDef'] = [ - dict(Id=message.message_id, - ReceiptHandle=message.receipt_handle, - VisibilityTimeout=0) - for message in message_batch - ] - response = queue.change_message_visibility_batch(Entries=batch) - if len(response['Successful']) != len(batch): - raise RuntimeError(f'Failed to return message: {response!r}') - - def _delete_messages(self, - message_batches: Iterable[builtins.list['Message']], - queue: 'Queue'): - for message_batch in message_batches: - response = queue.delete_messages( - Entries=[dict(Id=message.message_id, - ReceiptHandle=message.receipt_handle) for message in message_batch]) - if len(response['Successful']) != len(message_batch): - raise RuntimeError(f'Failed to delete messages: {response!r}') - - def _condense(self, message: 'Message') -> JSON: - """ - Prepare a message for writing to a local file. - """ - # The cast is needed because the type stub for `Message` misuses `typing.Literal` - attributes = cast(dict[str, str], message.attributes) - return { - 'MessageId': message.message_id, - 'ReceiptHandle': message.receipt_handle, - 'MD5OfBody': message.md5_of_body, - 'Body': json.loads(message.body) if self._json_body else message.body, - 'Attributes': json_mapping(attributes), - '_Attributes': { - k: datetime.fromtimestamp(int(json_str(attributes[k])) / 1000).astimezone().isoformat() - for k in ('SentTimestamp', 'ApproximateFirstReceiveTimestamp') - } - } - - def _reconstitute(self, message: JSON) -> 'SendMessageBatchRequestEntryTypeDef': - """ - Prepare a message from a local file for submission to a queue. - - The inverse of _condense(). - """ - body = message['Body'] - if not isinstance(body, str): - body = json.dumps(body) - attributes = json_mapping(message['Attributes']) - result: 'SendMessageBatchRequestEntryTypeDef' = { - 'Id': json_str(message['MessageId']), - 'MessageBody': body, - } - for key in ('MessageGroupId', 'MessageDeduplicationId'): - try: - result[key] = json_str(attributes[key]) - except KeyError: - pass - return result - - def all_queues(self) -> dict[str, 'Queue']: - return self.get_queues(config.all_queue_names) - - def get_queues(self, queue_names: Iterable[str]) -> dict[str, 'Queue']: - return { - queue_name: aws.sqs_queue(queue_name) - for queue_name in queue_names - } - - def get_queue_lengths(self, - queues: Mapping[str, 'Queue'] - ) -> tuple[int, dict[str, int]]: - """ - Count the number of messages in the given queues. - - :param queues: A dictionary of Boto3 queue resources by name. - - :return: A tuple of the total number of messages in all queues and a - dictionary mapping each queue's name to the number of messages - in that queue. - """ - total, lengths = 0, {} - for queue_name, queue in queues.items(): - queue.reload() - message_counts = [ - int(queue.attributes['ApproximateNumberOfMessages']), - int(queue.attributes['ApproximateNumberOfMessagesNotVisible']), - int(queue.attributes['ApproximateNumberOfMessagesDelayed']), - ] - length = sum(message_counts) - log.debug('Queue %s has %i message(s) (%i available, %i in flight and %i delayed).', - queue_name, length, *message_counts) - total += length - lengths[queue_name] = length - return total, lengths - - def wait_to_stabilize(self, - queue_names: Iterable[str], - timeout: int, - *, - detect_stall: bool - ) -> int: - """ - Wait for queues to reach a steady state. - - :param queue_names: Which queues to wait for. - - :param timeout: The highest timeout among lambda functions receiving - messages from the queues. - - :param detect_stall: If True, the method will raise an exception if - there is no observable progress while the queues - are nonempty. Otherwise, the method will wait - indefinitely for the queues to empty. - - :return: The total final length of the stabilized queues. The only - observable is zero; otherwise, an exception is raised. - """ - sleep_time = 10 - queues = self.get_queues(queue_names) - maxlen = ceil(timeout / sleep_time) - total_lengths: deque[int] = deque(maxlen=maxlen) - # Two minutes to safely accommodate SQS eventual consistency window of - # one minute. For more info, read WARNING section on - # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sqs.html#SQS.Client.get_queue_attributes - assert maxlen * sleep_time >= 2 * 60 - - while True: - # Determine queue lengths - total_length, queue_lengths = self.get_queue_lengths(queues) - total_lengths.append(total_length) - log.info('Counting %i messages in %i queues.', - total_length, len(queue_lengths)) - log.info('Message count history (most recent first) is %r.', - list(reversed(total_lengths))) - - min_num_zeros = 60 // sleep_time - assert min_num_zeros <= maxlen, min_num_zeros - num_total_lengths = len(total_lengths) - if num_total_lengths >= min_num_zeros: - if not any(islice(reversed(total_lengths), min_num_zeros)): - final_length = total_lengths[-1] - log.info('The queues have emptied.') - break - elif detect_stall and num_total_lengths == total_lengths.maxlen: - cummdiff = sum( - abs(first - second) - for first, second in more_itertools.pairwise(total_lengths) - ) - if cummdiff == 0: - final_length = total_lengths[-1] - log.info('The queues have stabilized.') - break - - log.info('Waiting for %s queue(s) to stabilize ...', len(queues)) - time.sleep(sleep_time) - - if final_length != 0: - raise Exception('The queues have stalled', final_length) - return final_length - - def feed(self, path: str, queue_name: str, force: bool = False): - with open(path) as file: - content = json.load(file) - orig_queue = content['queue'] - messages = content['messages'] - queue = aws.sqs_queue(queue_name) - log.info('Writing messages from file %r to queue %r', path, queue.url) - if orig_queue != queue.url: - if force: - log.warning('Messages originating from queue %r are being fed into queue %r', - orig_queue, queue.url) - else: - raise RuntimeError(f'Cannot feed messages originating from {orig_queue!r} to {queue.url!r}. ' - f'Use --force to override.') - message_batches = list(more_itertools.chunked(messages, self.batch_size)) - - def _cleanup(): - if self._delete: - remaining_messages = list(chain.from_iterable(message_batches)) - if len(remaining_messages) < len(messages): - self._dump_messages(messages, orig_queue, path) - else: - assert len(remaining_messages) == len(messages) - log.info('No messages were submitted, not touching local file %r', path) - - while message_batches: - message_batch = message_batches[0] - entries = [self._reconstitute(message) for message in message_batch] - try: - queue.send_messages(Entries=entries) - except BaseException: - assert message_batches - _cleanup() - raise - message_batches.pop(0) - - if self._delete: - if message_batches: - _cleanup() - else: - log.info('All messages were submitted, removing local file %r', path) - os.unlink(path) - - def purge(self, queue_name: str): - queues = self.get_queues([queue_name]) - self.purge_queues_safely(queues) - - def purge_all(self): - self.purge_queues_safely(self.all_queues()) - - def purge_indexer(self): - queues = self.get_queues(config.indexer_work_queue_names) - self.purge_queues_safely(queues) - - def purge_mirror(self): - queues = self.get_queues(config.mirror_work_queue_names) - self.purge_queues_safely(queues) - - def purge_queues_safely(self, queues: Mapping[str, 'Queue']): - self.manage_lambdas(queues, enable=False) - self.purge_queues_unsafely(queues) - self.manage_lambdas(queues, enable=True) - - def purge_queues_unsafely(self, queues: Mapping[str, 'Queue']): - with ThreadPoolExecutor(max_workers=len(queues)) as tpe: - futures = [tpe.submit(self._purge_queue, queue) for queue in queues.values()] - self._handle_futures(futures) - - def _purge_queue(self, queue: 'Queue'): - log.info('Purging queue %r', queue.url) - queue.purge() - self._wait_for_queue_empty(queue) - - def _wait_for_queue_idle(self, queue: 'Queue'): - while True: - num_inflight_messages = int(queue.attributes['ApproximateNumberOfMessagesNotVisible']) - if num_inflight_messages == 0: - break - log.info('Queue %r has %i in-flight messages', queue.url, num_inflight_messages) - time.sleep(3) - queue.reload() - - def _wait_for_queue_empty(self, queue: 'Queue'): - while True: - num_messages = ( - int(queue.attributes['ApproximateNumberOfMessages']) + - int(queue.attributes['ApproximateNumberOfMessagesDelayed']) + - int(queue.attributes['ApproximateNumberOfMessages']) - ) - if num_messages == 0: - break - log.info('Queue %r still has %i messages', queue.url, num_messages) - time.sleep(3) - queue.reload() - - def _manage_sqs_push(self, function_name: str, queue: 'Queue', enable: bool): - lambda_ = aws.lambda_ - response = lambda_.list_event_source_mappings(FunctionName=function_name, - EventSourceArn=queue.attributes['QueueArn']) - mapping_uuid = one(response['EventSourceMappings'])['UUID'] - - def update_(): - log.info('%s push from %r to lambda function %r', - 'Enabling' if enable else 'Disabling', queue.url, function_name) - lambda_.update_event_source_mapping(UUID=mapping_uuid, Enabled=enable) - - state = one(response['EventSourceMappings'])['State'] - while True: - log.info('Push from %r to lambda function %r is in state %r.', - queue.url, function_name, state) - if state in ('Disabling', 'Enabling', 'Updating'): - pass - elif state == 'Enabled': - if enable: - break - else: - update_() - elif state == 'Disabled': - if enable: - update_() - else: - break - else: - raise NotImplementedError(state) - time.sleep(3) - state = lambda_.get_event_source_mapping(UUID=mapping_uuid)['State'] - - def functions_by_queue(self) -> dict[str, str]: - """ - Returns a dictionary that maps queues to the Lambda function triggered - by the queue. The keys and values are fully qualified resource names. - """ - indexer = load_app_module('indexer') - functions_by_queue = { - handler.queue: config.indexer_function_name(handler.name) - for handler in indexer.app.handler_map.values() - if hasattr(handler, 'queue') - } - invalid_queues = functions_by_queue.keys() - set(config.all_queue_names) - assert not invalid_queues, invalid_queues - return functions_by_queue - - def manage_lambdas(self, queues: Mapping[str, 'Queue'], enable: bool): - """ - Enable or disable the readers and writers of the given queues. - """ - functions_by_queue = self.functions_by_queue() - - with ThreadPoolExecutor(max_workers=len(queues)) as tpe: - futures = [] - - def submit(f, *args, **kwargs): - futures.append(tpe.submit(f, *args, **kwargs)) - - for queue_name, queue in queues.items(): - try: - function = functions_by_queue[queue_name] - except KeyError: - assert queue_name in config.fail_queue_names - else: - if queue_name == config.notifications_queue.name: - # Prevent new notifications from being added - submit(self._manage_lambda, config.indexer_name, enable) - submit(self._manage_sqs_push, function, queue, enable) - self._handle_futures(futures) - futures = [tpe.submit(self._wait_for_queue_idle, queue) for queue in queues.values()] - self._handle_futures(futures) - - def _manage_lambda(self, function_name: str, enable: bool): - self._lambdas.manage_lambda(function_name, enable) - - @cached_property - def _lambdas(self) -> Lambdas: - return Lambdas() - - def _handle_futures(self, futures: Iterable[Future]): - errors = [] - for future in as_completed(futures): - e = future.exception() - if e: - errors.append(e) - log.error('Exception in worker thread', exc_info=e) - if errors: - raise RuntimeError(errors) - - -class Action(Serializable, Enum): - - @classmethod - def from_json(cls, action: AnyJSON) -> Self: - assert isinstance(action, str), R('Action is not a string', type(action)) - try: - return cls[action] - except KeyError: - assert False, R('Invalid action', action) - - def to_json(self) -> str: - return self.name diff --git a/src/azul/schemas.py b/src/azul/schemas.py deleted file mode 100644 index ca47889b0b..0000000000 --- a/src/azul/schemas.py +++ /dev/null @@ -1,85 +0,0 @@ -import json -from typing import ( - Any, -) - -from azul import ( - JSON, - format_description as fd, - mutable_furl, -) -from azul.chalice import ( - AppController, -) -from azul.openapi import ( - params, - responses, - schema, -) - - -class SchemaController(AppController): - """ - A controller for serving JSON schemas relating to an Azul facility - """ - schema_url_path = '/schemas/{facility}/{schema_name}/{version_and_extension}' - - def schema_url(self, - *, - facility: str, - schema_name: str, - version: int - ) -> mutable_furl: - path = self.schema_url_path.format(facility=facility, - schema_name=schema_name, - version_and_extension=f'v{version}.json') - return self.app.base_url.set(path=path) - - def handlers(self) -> dict[str, Any]: - """ - Chalice routes and application handlers to be injected into the global - scope of a Chalice application module. - """ - - @self.app.route( - self.schema_url_path, - methods=['GET'], - cors=True, - spec={ - 'summary': 'Retrieve JSON schemas', - 'tags': ['Auxiliary'], - 'parameters': [ - params.path('facility', str), - params.path('schema_name', str), - params.path('version_and_extension', schema.pattern(r'v\d+\.json')), - ], - 'description': fd( - ''' - [JSON Schemas](https://json-schema.org/docs) for various Azul facilities. - ''' - ), - 'responses': { - '200': { - 'description': 'Contents of the schema', - **responses.json_content( - schema.object( - schema=str, - id=str, - type=str, - additionalProperties=True - ) - ) - } - } - } - ) - def get_schema(facility: str, - schema_name: str, - version_and_extension: str - ) -> JSON: - path = 'schemas', facility, schema_name, version_and_extension - schema = json.loads(self.app.load_static_resource(*path)) - schema['$id'] = str(self.app.self_url) - return schema - - return locals() diff --git a/src/azul/service/__init__.py b/src/azul/service/__init__.py deleted file mode 100644 index f9470a08d7..0000000000 --- a/src/azul/service/__init__.py +++ /dev/null @@ -1,172 +0,0 @@ -from collections.abc import ( - Mapping, - Sequence, -) -import logging -from typing import ( - Protocol, - Self, - TypedDict, -) - -import attr -from chalice import ( - ForbiddenError, -) - -from azul import ( - CatalogName, - mutable_furl, -) -from azul.json import ( - copy_json, -) -from azul.plugins import ( - MetadataPlugin, -) -from azul.types import ( - FlatJSON, - JSON, - PrimitiveJSON, -) - -log = logging.getLogger(__name__) - -# We can't express that these are actually pairs. We could, using tuples, but -# those are not JSON, generally speaking, even though the `json` module supports -# serializing them by default. -FilterRange = Sequence[int] | Sequence[float] | Sequence[str] - -# `is` is a reserved keyword so we can't use the class-based syntax for -# TypedDict, but have to use the constructor-based one instead. We don't -# currently represent the mutual exclusivity of the operators. We could, as a -# union of singleton TypeDict subclasses, but PyCharm doesn't support that. -# -FilterOperator = TypedDict( - 'FilterOperator', - { - 'is': list[PrimitiveJSON | FlatJSON], - 'is_not': list[PrimitiveJSON | FlatJSON], - 'intersects': Sequence[FilterRange], - 'contains': Sequence[FilterRange | int | float | str], - 'within': Sequence[FilterRange], - }, - total=False -) - -FiltersJSON = Mapping[str, FilterOperator] - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class Filters: - explicit: FiltersJSON - source_ids: set[str] - - @classmethod - def from_json(cls, json: JSON) -> Self: - """ - Deserialize an instance of this class without reifying it. - """ - return cls(explicit=json['explicit'], - source_ids=set(json['source_ids'])) - - def to_json(self) -> JSON: - """ - The inverse of :py:meth:`from_json`. - """ - return { - 'explicit': self.explicit, - 'source_ids': sorted(self.source_ids) - } - - def update(self, filters: FiltersJSON) -> Self: - return attr.evolve(self, explicit={**self.explicit, **filters}) - - def reify(self, - plugin: MetadataPlugin, - *, - limit_access: bool = True - ) -> FiltersJSON: - """ - Combine the explicit filters passed in by clients with the implicit ones - representing additional restrictions such as which sources are - accessible to clients. - - :param plugin: Metadata plugin for the current request's catalog - - :param limit_access: Whether to enforce data access controls by - inserting an implicit filter on the source ID facet - """ - filters = copy_json(self.explicit) - special_fields = plugin.special_fields - - def extract_filter(field: str, *, default: set | None) -> set | None: - filter = filters.pop(field, {}) - # Other operators are not supported on string or boolean fields - assert filter.keys() <= {'is'}, filter - try: - values = filter['is'] - except KeyError: - return default - else: - return set(values) - - explicit_sources = extract_filter(special_fields.source_id, default=None) - accessible = extract_filter(special_fields.accessible, default={False, True}) - source_relation = 'is' - - if limit_access: - if explicit_sources is None: - sources = self.source_ids if True in accessible else [] - else: - forbidden_sources = explicit_sources - self.source_ids - if forbidden_sources: - raise ForbiddenError('Cannot filter by inaccessible sources', - forbidden_sources) - else: - sources = explicit_sources if True in accessible else [] - else: - if accessible == set(): - sources = [] - elif accessible == {False, True}: - sources = explicit_sources - elif accessible == {True}: - if explicit_sources is None: - sources = self.source_ids - else: - sources = self.source_ids & explicit_sources - elif accessible == {False}: - if explicit_sources is None: - sources = self.source_ids - source_relation = 'is_not' - else: - sources = explicit_sources - self.source_ids - else: - assert False, accessible - - if sources is None: - assert limit_access is False, limit_access - else: - filters[special_fields.source_id] = {source_relation: sorted(sources)} - - if limit_access: - assert set(filters[special_fields.source_id]['is']) <= self.source_ids - - return filters - - -class BadArgumentException(Exception): - - def __init__(self, message): - super().__init__(message) - - -class FileUrlFunc(Protocol): - - def __call__(self, - *, - catalog: CatalogName, - file_uuid: str, - fetch: bool = True, - **params: str - ) -> mutable_furl: ... diff --git a/src/azul/service/app_controller.py b/src/azul/service/app_controller.py deleted file mode 100644 index da3057a210..0000000000 --- a/src/azul/service/app_controller.py +++ /dev/null @@ -1,153 +0,0 @@ -import json -from typing import ( - Any, - Callable, - Mapping, -) - -import attr -from chalice import ( - BadRequestError as BRE, - NotFoundError, -) - -from azul import ( - R, - RequirementError, - config, -) -from azul.chalice import ( - AppController, -) -from azul.service import ( - FileUrlFunc, - FiltersJSON, -) -from azul.strings import ( - pluralize, -) - - -@attr.s(auto_attribs=True, frozen=True, kw_only=True) -class ServiceAppController(AppController): - file_url_func: FileUrlFunc - - def _parse_filters(self, filters: str | None) -> FiltersJSON: - """ - Parses a string with Azul filters in JSON syntax. Handles default cases - where filters are None or '{}'. - """ - if filters is None: - return {} - else: - return json.loads(filters) - - -def validate_catalog(catalog): - try: - config.Catalog.validate_name(catalog) - except AssertionError as e: - if R.caused(e): - raise R.propagate(e, BRE) - else: - raise - else: - if catalog not in config.catalogs: - raise NotFoundError(f'Catalog name {catalog!r} does not exist. ' - f'Must be one of {set(config.catalogs)}.') - - -class Mandatory: - """ - Validation wrapper signifying that a parameter is mandatory. - """ - - def __init__(self, validator: Callable) -> None: - super().__init__() - self._validator = validator - - def __call__(self, param): - return self._validator(param) - - -def validate_params(query_params: Mapping[str, str], - allow_extra_params: bool = False, - **validators: Callable[[Any], Any]) -> None: - """ - Validates request query parameters for web-service API. - - :param query_params: the parameters to be validated - - :param allow_extra_params: - - When False, only parameters specified via '**validators' are accepted, - and validation fails if additional parameters are present. When True, - additional parameters are allowed but their value is not validated. - - :param validators: - - A dictionary mapping the name of a parameter to a function that will be - used to validate the parameter if it is provided. The callable will be - called with a single argument, the parameter value to be validated, and - is expected to raise ValueError, TypeError or azul.RequirementError if - the value is invalid. Only these exceptions will yield a 4xx status - response, all other exceptions will yield a 500 status response. If the - validator is an instance of `Mandatory`, then validation will fail if - its corresponding parameter is not provided. - - >>> validate_params({'order': 'asc'}, order=str) - - >>> validate_params({'size': 'foo'}, size=int) - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid value for `size` - - >>> validate_params({'order': 'asc', 'foo': 'bar'}, order=str) - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Unknown query parameter `foo` - - >>> validate_params({'order': 'asc', 'foo': 'bar'}, order=str, allow_extra_params=True) - - >>> validate_params({}, foo=str) - - >>> validate_params({}, foo=Mandatory(str)) - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Missing required query parameter `foo` - - """ - - def fmt_error(err_description, params): - # Sorting is to produce a deterministic error message - joined = ', '.join(f'`{p}`' for p in sorted(params)) - return f'{err_description} {pluralize("query parameter", len(params))} {joined}' - - provided_params = query_params.keys() - validation_params = validators.keys() - mandatory_params = { - param_name - for param_name, validator in validators.items() - if isinstance(validator, Mandatory) - } - - if not allow_extra_params: - extra_params = provided_params - validation_params - if extra_params: - raise BRE(fmt_error('Unknown', extra_params)) - - if mandatory_params: - missing_params = mandatory_params - provided_params - if missing_params: - raise BRE(fmt_error('Missing required', missing_params)) - - for param_name, validator in validators.items(): - try: - param_value = query_params[param_name] - except KeyError: - pass - else: - try: - validator(param_value) - except (TypeError, ValueError, RequirementError): - raise BRE(f'Invalid value for `{param_name}`') diff --git a/src/azul/service/async_manifest_service.py b/src/azul/service/async_manifest_service.py deleted file mode 100644 index 6d4dbac3e8..0000000000 --- a/src/azul/service/async_manifest_service.py +++ /dev/null @@ -1,230 +0,0 @@ -import base64 -import json -import logging -from typing import ( - Self, - TypedDict, -) -from uuid import ( - UUID, -) - -import attrs -import msgpack - -from azul import ( - config, -) -from azul.attrs import ( - strict_auto, -) -from azul.deployment import ( - aws, -) -from azul.types import ( - JSON, -) - -log = logging.getLogger(__name__) - - -@attrs.frozen -class InvalidTokenError(Exception): - value: str = strict_auto() - - -@attrs.frozen(kw_only=True) -class Token: - """ - Represents a Step Function execution to generate a manifest - """ - #: A hash of the inputs - generation_id: UUID = strict_auto() - - #: Number of prior executions for the generation represented by this token - iteration: int = strict_auto() - - #: The number of times the service received a request to inspect the - #: status of the execution represented by this token - request_index: int = strict_auto() - - #: How long clients should wait before requesting a status update about the - #: execution represented by the token - retry_after: int = strict_auto() - - @property - def execution_id(self) -> tuple[UUID, int]: - return self.generation_id, self.iteration - - def pack(self) -> bytes: - return msgpack.packb([ - self.generation_id.bytes, - self.iteration, - self.request_index, - self.retry_after - ]) - - @classmethod - def unpack(cls, pack: bytes) -> Self: - i = iter(msgpack.unpackb(pack)) - return cls(generation_id=UUID(bytes=next(i)), - iteration=next(i), - request_index=next(i), - retry_after=next(i)) - - def encode(self) -> str: - return base64.urlsafe_b64encode(self.pack()).decode() - - @classmethod - def decode(cls, token: str) -> Self: - try: - return cls.unpack(base64.urlsafe_b64decode(token)) - except Exception as e: - raise InvalidTokenError(token) from e - - @classmethod - def first(cls, generation_id: UUID, iteration: int) -> Self: - return cls(generation_id=generation_id, - iteration=iteration, - request_index=0, - retry_after=cls._next_retry_after(0)) - - def next(self, *, retry_after: int | None = None) -> Self: - if retry_after is None: - retry_after = self._next_retry_after(self.request_index) - return attrs.evolve(self, - retry_after=retry_after, - request_index=self.request_index + 1) - - @classmethod - def _next_retry_after(cls, request_index: int) -> int: - delays = [1, 1, 4, 6, 10] - try: - return delays[request_index] - except IndexError: - return delays[-1] - - -class ExecutionResult(TypedDict): - input: JSON - output: JSON - - -@attrs.frozen -class NoSuchGeneration(Exception): - token: Token = strict_auto() - - -@attrs.frozen -class GenerationFinished(Exception): - token: Token = strict_auto() - - -@attrs.frozen(kw_only=True) -class GenerationFailed(Exception): - status: str = strict_auto() - output: str | None = strict_auto() - - -@attrs.frozen -class InvalidGeneration(Exception): - token: Token = strict_auto() - - -class AsyncManifestService: - """ - Starting and checking the status of manifest generation jobs. - """ - - @property - def machine_name(self): - return config.qualified_resource_name(config.manifest_sfn) - - def start_generation(self, - generation_id: UUID, - input: JSON, - iteration: int - ) -> Token: - execution_name = self.execution_name(generation_id, iteration) - execution_arn = self.execution_arn(execution_name) - # The input contains the verbatim manifest key as JSON while the ARN - # contains the encoded hash of the key so this log line is useful for - # associating the hash with the key for diagnostic purposes. - log.info('Starting execution %r for input %r', execution_arn, input) - token = Token.first(generation_id, iteration) - try: - # If there already is an execution of the given name, and if that - # execution is still ongoing and was given the same input as what we - # pass here, `start_execution` will succeed idempotently. - execution = self._sfn.start_execution(stateMachineArn=self.machine_arn, - name=execution_name, - input=json.dumps(input)) - except self._sfn.exceptions.ExecutionAlreadyExists: - # This exception indicates that there is already an execution with - # the given name but that it has ended, or that its input differs - # from what we were passing just now. The latter case is unexpected - # because any part of the input that affects the output is covered - # in the manifest hash and therefore the execution name. Any part of - # the input not affecting the output is constant and can only change - # with the source code which would have resulted in a different - # execution name. - # - # In the former case we return the token so that the client has to - # make another request to actually obtain the resulting manifest. - # Strictly speaking, we could return the manifest here, but it keeps - # the control flow simpler. This benevolent race is not probable - # enough to warrant an optimization. - execution = self._sfn.describe_execution(executionArn=execution_arn) - if input == json.loads(execution['input']): - log.info('A completed execution %r already exists', execution_arn) - raise GenerationFinished(token) - else: - raise InvalidGeneration(token) - else: - assert execution_arn == execution['executionArn'], (execution_arn, execution) - log.info('Started execution %r or it was already running', execution_arn) - return token - - def inspect_generation(self, token: Token) -> Token | ExecutionResult: - execution_name = self.execution_name(token.generation_id, token.iteration) - execution_arn = self.execution_arn(execution_name) - try: - execution = self._sfn.describe_execution(executionArn=execution_arn) - except self._sfn.exceptions.ExecutionDoesNotExist: - raise NoSuchGeneration(token) - else: - output = execution.get('output') - status = execution['status'] - if status == 'SUCCEEDED': - if output is None: - log.info('Execution %r succeeded, no output yet', execution_arn) - return token.next(retry_after=1) - else: - log.info('Execution %r succeeded with output %r', execution_arn, output) - return {k: json.loads(execution[k]) for k in ['input', 'output']} - elif status == 'RUNNING': - log.info('Execution %r is still running', execution_arn) - return token.next() - else: - raise GenerationFailed(status=status, output=output) - - def arn(self, suffix): - return f'arn:aws:states:{aws.region_name}:{aws.account}:{suffix}' - - @property - def machine_arn(self): - return self.arn(f'stateMachine:{self.machine_name}') - - def execution_arn(self, execution_name): - return self.arn(f'execution:{self.machine_name}:{execution_name}') - - def execution_name(self, generation_id: UUID, iteration: int) -> str: - assert isinstance(generation_id, UUID), generation_id - assert isinstance(iteration, int), iteration - execution_name = f'{generation_id}_{iteration}' - assert 0 < len(execution_name) <= 80, execution_name - return execution_name - - @property - def _sfn(self): - return aws.stepfunctions diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py deleted file mode 100644 index 604fc0f8da..0000000000 --- a/src/azul/service/avro_pfb.py +++ /dev/null @@ -1,805 +0,0 @@ -import bisect -from collections import ( - defaultdict, -) -from collections.abc import ( - Iterable, -) -from itertools import ( - chain, -) -import logging -from operator import ( - attrgetter, - itemgetter, -) -from typing import ( - ClassVar, - Mapping, - MutableSet, - Self, -) -from uuid import ( - UUID, - uuid5, -) - -import attr -import fastavro -from fastavro.validation import ( - ValidationError, -) -from more_itertools import ( - one, -) - -from azul import ( - R, - config, -) -from azul.indexer.field import ( - ClosedRange, - FieldTypes, - Nested, - null_bool, - null_datetime, - null_float, - null_int, - null_str, - pass_thru_int, - pass_thru_json, -) -from azul.json import ( - copy_json, -) -from azul.plugins import ( - RepositoryPlugin, -) -from azul.plugins.metadata.hca.indexer.transform import ( - pass_thru_uuid4, - value_and_unit, -) -from azul.types import ( - AnyJSON, - AnyMutableJSON, - JSON, - MutableJSON, - MutableJSONs, -) - -log = logging.getLogger(__name__) - -renamed_fields = { - 'related_files': None # None to remove field -} - - -def write_pfb_entities(entities: Iterable[JSON], pfb_schema: JSON, path: str): - assert isinstance(pfb_schema, dict) - parsed_schema = fastavro.parse_schema(pfb_schema) - with open(path, 'w+b') as fh: - # Writing the entities one at a time is ~2.5 slower, but makes it clear - # which entities fail, which is useful for debugging. - if config.debug > 1: - log.info('Writing PFB entities individually') - for entity in entities: - try: - fastavro.writer(fh, parsed_schema, [entity], validator=True) - except ValidationError: - log.error('Failed to write Avro entity: %r', entity) - raise - else: - fastavro.writer(fh, parsed_schema, entities, validator=True) - - -# FIXME: Unit tests do not cover PFB handover using an AnVIL catalog -# https://github.com/DataBiosphere/azul/issues/4606 -class PFBConverter: - """ - Converts documents from Elasticsearch into PFB entities. A document's inner - entities correspond to PFB entities which are normalized and linked via - Relations. - """ - - entity_type = 'files' - - def __init__(self, schema: JSON, repository_plugin: RepositoryPlugin): - self.schema = schema - self.repository_plugin = repository_plugin - self._entities: dict[PFBEntity, MutableSet[PFBRelation]] = defaultdict(set) - - def add_doc(self, doc: JSON): - """ - Add an Elasticsearch document to be transformed. - """ - doc_copy = copy_json(doc, 'contents', self.entity_type) - contents = doc_copy['contents'] - file_relations = set() - for entity_type, entities in contents.items(): - # copy_json is expected to only deep copy a subset of the document - if entity_type == self.entity_type: - assert entities is not doc['contents'][entity_type] - else: - assert entities is doc['contents'][entity_type] - entities = (e for e in entities if 'document_id' in e) - # Sorting entities is required for deterministic output since - # the order of the inner entities in an aggregate document is - # tied to the order with which contributions are returned by ES - # during aggregation, which happens to be non-deterministic. - for entity in sorted(entities, key=itemgetter('document_id')): - if entity_type != self.entity_type: - _inject_reference_handover_values(entity, doc) - pfb_entity = PFBEntity.for_aggregate(name=entity_type, - object_=entity, - schema=self.schema) - if pfb_entity not in self._entities: - self._entities[pfb_entity] = set() - file_relations.add(PFBRelation.to_entity(pfb_entity)) - file_entity: MutableJSON = one(contents[self.entity_type]) - related_files = file_entity.pop('related_files', []) - for entity in chain([file_entity], related_files): - _inject_reference_handover_values(entity, doc) - # File entities are assumed to be unique - pfb_entity = PFBEntity.for_aggregate(name=self.entity_type, - object_=entity, - schema=self.schema) - assert pfb_entity not in self._entities - # Terra streams PFBs and requires entities be defined before they are - # referenced. Thus we add the file entity after all the entities - # it relates to. - self._entities[pfb_entity] = file_relations - - def entities(self) -> Iterable[JSON]: - for entity, relations in self._entities.items(): - # Sort relations to make entities consistent for easy diffing - yield entity.to_json(sorted(relations, key=attrgetter('dst_name', 'dst_id'))) - - -def _reversible_join(joiner: str, parts: Iterable[str]) -> str: - parts = list(parts) - assert all(joiner not in part for part in parts), R('Found joiner in', parts) - return joiner.join(parts) - - -@attr.s(auto_attribs=True, frozen=True, kw_only=True) -class PFBEntity: - """ - Python representation of the PFB data object. Attribute names conform to the - PFB spec (which simplifies serialization). - """ - id: str - name: str - object: MutableJSON = attr.ib(eq=False) - namespace_uuid: ClassVar[UUID] = UUID('bc93372b-9218-4f0e-b64e-6f3b339687d6') - - def __attrs_post_init__(self): - assert len(self.id) <= 254, R('Terra requires IDs be no longer than 254 chars') - - @classmethod - def for_aggregate(cls, - name: str, - object_: MutableJSON, - schema: JSON - ) -> Self: - """ - Derive ID from object in a reproducible way so that we can distinguish - entities by comparing their IDs. - """ - cls._add_missing_fields(name, object_, schema) - ids = object_['document_id'] - # document_id is an array unless the inner entity type matches the - # outer entity type - ids = sorted(ids) if isinstance(ids, list) else [ids] - id_ = uuid5(cls.namespace_uuid, _reversible_join('_', ids)) - id_ = _reversible_join('.', map(str, (name, id_, len(ids)))) - return cls(id=id_, name=name, object=object_) - - @classmethod - def for_replica(cls, id: str, replica: MutableJSON) -> Self: - name, object_ = replica['replica_type'], replica['contents'] - return cls(id=id, name=name, object=object_) - - @classmethod - def _add_missing_fields(cls, name: str, object_: MutableJSON, schema): - """ - Compare entities against the schema and add any fields that are missing. - None is the default value. - """ - if schema['type'] == 'record': - object_schema = one(f for f in schema['fields'] if f['name'] == 'object') - entity_schema = one(e for e in object_schema['type'] if e['name'] == name) - elif isinstance(schema['type'], dict): - entity_schema = schema['type']['items'] - else: - assert False, schema - for field in entity_schema['fields']: - field_name, field_type = field['name'], field['type'] - if field_name not in object_: - if isinstance(field_type, list): - assert 'null' in field_type, field - default_value = None - elif isinstance(field_type, dict) and field_type['type'] == 'array': - if isinstance(field_type['items'], dict): - assert field_type['items']['type'] in ('record', 'array'), field - default_value = [] - else: - assert 'null' in field_type['items'], field - default_value = [None] - elif field_type == 'null': - default_value = None - else: - assert False, field - object_[field_name] = default_value - if ( - isinstance(field_type, dict) - and field_type['type'] == 'array' - and isinstance(field_type['items'], dict) - and field_type['items']['type'] == 'record' - ): - for sub_object in object_[field_name]: - cls._add_missing_fields(name=field_name, - object_=sub_object, - schema=field) - - def to_json(self, relations: Iterable['PFBRelation']) -> JSON: - return { - 'id': self.id, - 'name': self.name, - # https://fastavro.readthedocs.io/en/latest/writer.html#using-the-tuple-notation-to-specify-which-branch-of-a-union-to-take - 'object': (self.name, self.object), - 'relations': [attr.asdict(relation) for relation in relations] - } - - -@attr.s(auto_attribs=True, frozen=True, kw_only=True) -class PFBRelation: - dst_id: str - # A more appropriate attribute name would be dst_type, but we stick with - # 'dst_name' to conform to PFB spec - dst_name: str - - @classmethod - def to_entity(cls, entity: PFBEntity) -> Self: - return cls(dst_id=entity.id, dst_name=entity.name) - - -def pfb_links_from_field_types(field_types: FieldTypes) -> MutableJSON: - return { - entity_type: [] if entity_type == 'files' else [{ - 'multiplicity': 'MANY_TO_MANY', - 'dst': 'files', - 'name': 'files' - }] - for entity_type in field_types - } - - -def pfb_metadata_entity(links_by_entity_type: Mapping[str, MutableJSONs], - ) -> MutableJSON: - """ - The Metadata entity encodes the possible relationships between tables. - - Unfortunately Terra does not display the relations between the nodes. - """ - return { - 'id': None, - 'name': 'Metadata', - 'object': { - 'nodes': [ - { - 'name': entity_type, - 'ontology_reference': '', - 'values': {}, - 'links': links, - 'properties': [] - } - for entity_type, links in links_by_entity_type.items() - ], - 'misc': {} - } - } - - -def pfb_schema_from_field_types(field_types: FieldTypes) -> JSON: - field_types = _inject_reference_handover_columns(field_types) - entity_schemas = ( - { - 'name': entity_type, - 'type': 'record', - 'fields': list(_entity_schema_recursive(field_type, entity_type)) - } - for entity_type, field_type in field_types.items() - # We skip primitive top-level fields like total_estimated_cells - if isinstance(field_type, dict) - ) - return avro_pfb_schema(entity_schemas) - - -def pfb_schema_from_replicas(replicas: Iterable[JSON]) -> list[JSON]: - schemas_by_replica_type: dict[str, MutableJSON] = {} - for replica in replicas: - replica_type, replica_contents = replica['replica_type'], replica['contents'] - _update_replica_schema(schema=schemas_by_replica_type, - path=(replica_type,), - key=replica_type, - value=replica_contents) - return list(schemas_by_replica_type.values()) - - -def avro_pfb_schema(azul_avro_schema: Iterable[JSON]) -> JSON: - """ - The boilerplate Avro schema that comprises a PFB's schema is returned in - this JSON literal below. This schema was copied from - - https://github.com/uc-cdis/pypfb/blob/1497bf50e5c85201f6bad9ca69616138b17b8c77/src/pfb/writer.py#L85 - - :param azul_avro_schema: The parts of the schema describe the custom tables - we insert into the PFB - - :return: The complete and valid Avro schema - """ - return { - 'type': 'record', - 'name': 'Entity', - 'fields': [ - { - 'name': 'id', - 'type': ['null', 'string'], - 'default': None - }, - { - 'name': 'name', - 'type': 'string' - }, - { - 'name': 'object', - 'type': [ - { - 'type': 'record', - 'name': 'Metadata', - 'fields': [ - { - 'name': 'nodes', - 'type': { - 'type': 'array', - 'items': { - 'type': 'record', - 'name': 'Node', - 'fields': [ - { - 'name': 'name', - 'type': 'string' - }, - { - 'name': 'ontology_reference', - 'type': 'string', - }, - { - 'name': 'values', - 'type': { - 'type': 'map', - 'values': 'string', - }, - }, - { - 'name': 'links', - 'type': { - 'type': 'array', - 'items': { - 'type': 'record', - 'name': 'Link', - 'fields': [ - { - 'name': 'multiplicity', - 'type': { - 'type': 'enum', - 'name': 'Multiplicity', - 'symbols': [ - 'ONE_TO_ONE', - 'ONE_TO_MANY', - 'MANY_TO_ONE', - 'MANY_TO_MANY', - ], - }, - }, - { - 'name': 'dst', - 'type': 'string', - }, - { - 'name': 'name', - 'type': 'string', - }, - ], - }, - }, - }, - { - 'name': 'properties', - 'type': { - 'type': 'array', - 'items': { - 'type': 'record', - 'name': 'Property', - 'fields': [ - { - 'name': 'name', - 'type': 'string', - }, - { - 'name': 'ontology_reference', - 'type': 'string', - }, - { - 'name': 'values', - 'type': { - 'type': 'map', - 'values': 'string', - }, - }, - ], - }, - }, - }, - ], - }, - }, - }, - { - 'name': 'misc', - 'type': { - 'type': 'map', - 'values': 'string' - }, - }, - ], - }, - *azul_avro_schema - ] - }, - { - 'name': 'relations', - 'type': { - 'type': 'array', - 'items': { - 'type': 'record', - 'name': 'Relation', - 'fields': [ - { - 'name': 'dst_id', - 'type': 'string' - }, - { - 'name': 'dst_name', - 'type': 'string' - }, - ], - }, - }, - 'default': [], - }, - ], - } - - -def _inject_reference_handover_columns(field_types: FieldTypes) -> FieldTypes: - return { - entity_type: ( - dict(fields, datarepo_row_id=null_str, source_datarepo_snapshot_id=null_str) - if isinstance(fields, dict) and 'source_datarepo_row_ids' in fields - else fields - ) - for entity_type, fields in field_types.items() - } - - -def _inject_reference_handover_values(entity: MutableJSON, doc: JSON): - if 'source_datarepo_row_ids' in entity: - entity['datarepo_row_id'] = entity['document_id'] - entity['source_datarepo_snapshot_id'] = one(doc['sources'])['id'] - - -# FIXME: It's not obvious as to why these are union types. Explain or change. -# https://github.com/DataBiosphere/azul/issues/4094 - -# FIXME: It seems that these are just all primitive types, it just so happens -# that all of the primitive field types types are nullable -# https://github.com/DataBiosphere/azul/issues/4094 - -_json_to_pfb_types = { - bool: 'boolean', - float: 'double', - int: 'long', - str: 'string' -} - -_nullable_to_pfb_types = { - null_bool: ['null', 'boolean'], - null_float: ['null', 'double'], - null_int: ['null', 'long'], - null_str: ['null', 'string'], - null_datetime: ['null', 'string'], -} - - -def _entity_schema_recursive(field_types: FieldTypes, - *path: str - ) -> Iterable[JSON]: - for field_name, field_type in field_types.items(): - plural = isinstance(field_type, list) - if plural: - field_type = one(field_type) - try: - new_field_name = renamed_fields[field_name] - except KeyError: - pass - else: - if new_field_name is None: - break # to not include this field in the schema - else: - field_name = new_field_name - - if isinstance(field_type, Nested): - field_type = field_type.properties - - name_fields = {'name': field_name} - if path: - namespace = '.'.join(path) - qualified_name = namespace + '.' + field_name - name_fields['namespace'] = namespace - else: - qualified_name = field_name - - if isinstance(field_type, dict): - yield { - **name_fields, - 'type': { - # This is always an array, even if singleton is passed in - 'type': 'array', - 'items': { - 'name': qualified_name, - 'type': 'record', - 'fields': list(_entity_schema_recursive(field_type, *path, field_name)) - } - } - } - elif field_type in _nullable_to_pfb_types: - # Exceptions are fields that do not become lists during aggregation - field_exceptions = ( - 'donor_count', - 'estimated_cell_count', - 'total_estimated_cells', - 'total_estimated_cells_redundant', - 'source_datarepo_snapshot_id', - ) - path_exceptions = ( - ('projects', 'accessions'), - ('projects', 'tissue_atlas') - ) - # FIXME: The first term is not self-explanatory - # https://github.com/DataBiosphere/azul/issues/4094 - if ( - path[0] == 'files' and not plural - or field_name in field_exceptions - or path in path_exceptions - ): - yield { - **name_fields, - 'type': _nullable_to_pfb_types[field_type], - } - else: - yield { - **name_fields, - 'type': { - 'type': 'array', - 'items': _nullable_to_pfb_types[field_type], - } - } - elif field_type is pass_thru_uuid4: - yield { - **name_fields, - 'type': ['string'], - 'logicalType': 'UUID' - } - elif isinstance(field_type, ClosedRange): - yield { - **name_fields, - 'type': { - 'type': 'array', - 'items': { - 'type': 'array', - 'items': _json_to_pfb_types[one(field_type.ends_type.native_types)] - } - } - } - # FIXME: Nested is handled so much more elegantly. See if we can have - # ValueAndUnit inherit Nested. - # https://github.com/DataBiosphere/azul/issues/4094 - elif field_type is value_and_unit: - yield { - **name_fields, - 'type': { - 'type': 'array', - 'items': [ - 'null', - { - # FIXME: Why do we need to repeat `name` and `namespace` - # with the same values at two different depths? - # https://github.com/DataBiosphere/azul/issues/4094 - 'name': qualified_name, - 'type': 'record', - 'fields': [ - { - 'name': name, - 'namespace': qualified_name, - # Although, not technically a null_str, it's effectively the same - 'type': _nullable_to_pfb_types[null_str] - } - for name in ('value', 'unit') - ] - } - ] - } - } - elif field_type in (pass_thru_json, pass_thru_int): - # Pass thru types are used only for aggregation and are excluded - # from actual hits - pass - else: - assert False, field_type - - -def _sort_pfb_union(schema: str | dict) -> str: - if isinstance(schema, str): - return schema - else: - return schema['type'] - - -class SchemaUpdateException(Exception): - pass - - -def _update_replica_schema(*, - schema: MutableJSON, - path: tuple[str, ...], - key: str, - value: AnyMutableJSON): - """ - Update in place a (part of an) existing PFB schema to ensure that it - accommodates a given (part of a) JSON document. The schema will only ever - expand, so after updating it will describe a superset of the documents that - it described pre-update. Starting from an empty schema, repeatedly calling - this function allows us to discover a general schema for a series of - documents of unknown shape. - - :param schema: a part of a PFB schema. It may be empty. - - :param path: the series of field names that locate `schema` within its - top-level parent schema. The first entry should be the name of - the underlying PFB entity's record type. - - :param key: the key within `schema` whose associated value will be updated - to describe `value`. This is the only part of `schema` that may - be mutated. - - :param value: a part of a PFB entity. - """ - try: - old_type = schema[key] - except KeyError: - schema[key] = _new_replica_schema(path=path, value=value) - else: - if isinstance(old_type, list): - _update_replica_schema_union(schema=schema, path=path, key=key, value=value) - else: - if value is None and old_type == 'null': - pass - elif (isinstance(value, list) - and isinstance(old_type, dict) and old_type['type'] == 'array'): - for v in value: - _update_replica_schema_union(schema=old_type, - path=path, - key='items', - value=v) - elif (isinstance(value, dict) - and isinstance(old_type, dict) and old_type['type'] == 'record'): - old_fields = {field['name']: field for field in old_type['fields']} - for k in value.keys() | old_fields.keys(): - try: - field = old_fields[k] - except KeyError: - field = { - 'name': k, - 'namespace': '.'.join(path), - 'type': 'null' - } - bisect.insort(old_type['fields'], field, key=itemgetter('name')) - new_value = value[k] - else: - new_value = value.get(k) - _update_replica_schema_union(schema=field, - path=(*path, k), - key='type', - value=new_value) - else: - try: - new_type = _json_to_pfb_types[type(value)] - except KeyError: - raise SchemaUpdateException - else: - if new_type != old_type: - raise SchemaUpdateException - - -def _update_replica_schema_union(*, - schema: MutableJSON, - path: tuple[str, ...], - key: str, - value: AnyMutableJSON): - old_type = schema[key] - if not isinstance(old_type, list): - old_type = [old_type] - for union_member in old_type: - try: - _update_replica_schema(schema={key: union_member}, - path=path, - key=key, - value=value) - except SchemaUpdateException: - continue - else: - break - else: - new_type = _new_replica_schema(path=path, value=value) - if old_type: - bisect.insort(old_type, new_type, key=_sort_pfb_union) - else: - old_type = new_type - schema[key] = old_type - - -def _new_replica_schema(*, - path: tuple[str, ...], - value: AnyJSON, - ) -> AnyMutableJSON: - """ - Create a part of a PFB schema to describe a part of a PFB entity represented - as a JSON document. - - :param path: the location of `value` within the root document as a series - of keys. The first key should be the name of the underlying PFB - entity's type within the schema. - - :param value: a part of a PFB entity. - - :return: JSON describing the contents of `value` as a part of PFB schema. - """ - if value is None: - result = 'null' - elif isinstance(value, list): - # Empty list indicates "no type" (emtpy union). This will be replaced - # with an actual type unless we never encounter a non-empty array. - result = {'type': 'array', 'items': []} - for v in value: - _update_replica_schema(schema=result, - path=path, - key='items', - value=v) - elif isinstance(value, dict): - name = '.'.join(path) - result = { - 'name': name, - 'type': 'record', - 'fields': [ - { - 'name': k, - 'namespace': name, - 'type': _new_replica_schema(path=(*path, k), value=v) - } - for k, v in sorted(value.items()) - ] - } - else: - result = _json_to_pfb_types[type(value)] - return result diff --git a/src/azul/service/buffer.py b/src/azul/service/buffer.py deleted file mode 100644 index cf6fbc2f23..0000000000 --- a/src/azul/service/buffer.py +++ /dev/null @@ -1,67 +0,0 @@ -from io import ( - BytesIO, -) -from logging import ( - getLogger, -) -from typing import ( - Callable, -) - -log = getLogger(__name__) - - -class FlushableBuffer(BytesIO): - """ - A buffer that flushes the output to a callback function (``callback``), - when either if the remaining size is large enough (more than ``chunk_size``) - or when the buffer is closed. - - The callback is invoked zero or more times with an argument that is N bytes - long, followed by exactly one invocation with an argument that is between 1 - and N bytes long. - - :param chunk_size: The exact size of each chunk - :param callback: The callback function to receive flushed output - """ - - def __init__(self, chunk_size: int, callback: Callable): - super(FlushableBuffer, self).__init__() - self.__chunk_size = chunk_size - self.__callback = callback - self.__remaining_size = 0 - - def write(self, b: bytes): - super().write(b) - self.__remaining_size += len(b) - - if self.__remaining_size >= self.__chunk_size: - offset = 0 - while self.__remaining_size >= self.__chunk_size: - self.seek(offset) - self.__callback(self.read(self.__chunk_size)) - offset += self.__chunk_size - self.__remaining_size -= self.__chunk_size - - # Get the remainder before resetting the pointer. - self.seek(offset) - remainder = self.read() - - # Reset the buffer to the empty state. - self.seek(0) - self.truncate(0) - self.__remaining_size = 0 - - # Write the remainder back to the buffer. - self.write(remainder) - - def close(self): - if self.__remaining_size > 0: - self.__callback(self.getvalue()) - self.__remaining_size = 0 - # As the buffer is closed, the buffer doesn't need to be reset. - super().close() - - @property - def remaining_size(self): - return self.__remaining_size diff --git a/src/azul/service/catalog_controller.py b/src/azul/service/catalog_controller.py deleted file mode 100644 index 6b985c5401..0000000000 --- a/src/azul/service/catalog_controller.py +++ /dev/null @@ -1,89 +0,0 @@ -import attr - -from azul import ( - CatalogName, - cache, - config, -) -from azul.openapi import ( - schema, -) -from azul.plugins import ( - MetadataPlugin, - Plugin, - RepositoryPlugin, -) -from azul.service.app_controller import ( - ServiceAppController, -) -from azul.types import ( - JSON, -) - - -class CatalogController(ServiceAppController): - - # The custom return type annotation is an experiment. Please don't adopt - # this just yet elsewhere in the program. - - def list_catalogs(self) -> schema.object( - default_catalog=str, - catalogs=schema.object( - additionalProperties=schema.object( - atlas=str, - internal=bool, - plugins=schema.object( - additionalProperties=schema.object( - name=str, - sources=schema.optional(schema.array(str)), - indices=schema.optional(schema.object( - additionalProperties=schema.object( - default_sort=str, - default_order=str - ) - )), - ), - ) - ) - ) - ): - return { - 'default_catalog': config.default_catalog, - 'catalogs': { - catalog.name: { - 'internal': catalog.internal, - 'atlas': catalog.atlas, - 'plugins': { - plugin_type: { - **attr.asdict(plugin), - **self._plugin_config(plugin_type, catalog.name) - } - for plugin_type, plugin in catalog.plugins.items() - } - } - for catalog in config.catalogs.values() - } - } - - @cache - def _plugin_config(self, plugin_base_cls: str, catalog: CatalogName) -> JSON: - plugin_base_cls = Plugin.type_for_name(plugin_base_cls) - plugin_cls = plugin_base_cls.load(catalog) - if issubclass(plugin_base_cls, RepositoryPlugin): - plugin = plugin_cls.create(catalog) - return { - 'sources': list(map(str, plugin.sources)) - } - elif issubclass(plugin_base_cls, MetadataPlugin): - plugin = plugin_cls.create() - return { - 'indices': { - entity_type: { - 'default_sort': sorting.field_name, - 'default_order': sorting.order - } - for entity_type, sorting in plugin.exposed_indices.items() - } - } - else: - assert False, plugin_base_cls diff --git a/src/azul/service/drs_controller.py b/src/azul/service/drs_controller.py deleted file mode 100644 index 3a3955cb81..0000000000 --- a/src/azul/service/drs_controller.py +++ /dev/null @@ -1,427 +0,0 @@ -from ast import ( - literal_eval, -) -import base64 -from collections.abc import ( - Mapping, -) -from dataclasses import ( - dataclass, - field, -) -from datetime import ( - datetime, -) -import time -import urllib.parse - -from chalice import ( - ChaliceViewError, - Response, -) -from deprecated import ( - deprecated, -) -from furl import ( - furl, -) -from more_itertools import ( - one, -) -import requests - -from azul import ( - CatalogName, - R, - cached_property, - config, - dss, - mutable_furl, -) -from azul.collections import ( - adict, -) -from azul.drs import ( - AccessMethod, - dos_object_url_path, - drs_object_uri, - drs_object_url_path, -) -from azul.openapi import ( - responses, - schema, -) -from azul.plugins import ( - File, -) -from azul.service.repository_service import ( - RepositoryService, -) -from azul.service.source_controller import ( - SourceController, -) -from azul.types import ( - JSON, - MutableJSON, -) - - -class DRSController(SourceController): - - @cached_property - def service(self) -> RepositoryService: - return RepositoryService() - - def _access_url(self, url): - return {'url': url} - - @classmethod - def get_object_response_schema(cls): - return responses.json_content( - schema.object( - created_time=str, - id=str, - self_uri=str, - size=str, - version=str, - checksums=schema.object(sha1=str, **{'sha-256': str}), - access_methods=schema.array(schema.object( - access_url=schema.optional(schema.object(url=str)), - type=schema.optional(str), - access_id=schema.optional(str) - )) - ) - ) - - def get_object(self, file_uuid, query_params): - drs_object = DRSObject(file_uuid, version=query_params.get('version')) - for access_method in AccessMethod: - # We only want direct URLs for Google - extra_params = dict(query_params, directurl=access_method.replica == 'gcp') - response = self.dss_get_file(file_uuid, access_method.replica, **extra_params) - if response.status_code == 301: - retry_url = response.headers['location'] - query = urllib.parse.urlparse(retry_url).query - query = urllib.parse.parse_qs(query, strict_parsing=True) - token = one(query['token']) - # We use the encoded token string as the key for our access ID. - access_id = encode_access_id(token, access_method.replica) - drs_object.add_access_method(access_method, access_id=access_id) - elif response.status_code == 302: - retry_url = response.headers['location'] - if access_method.replica == 'gcp': - assert retry_url.startswith('gs:') - drs_object.add_access_method(access_method, url=retry_url) - else: - # For errors, just proxy DSS response - return Response(response.text, status_code=response.status_code) - return Response(drs_object.to_json()) - - def get_object_access(self, access_id, file_uuid, query_params): - try: - token, replica = decode_access_id(access_id) - except ValueError: - return Response('Invalid DRS access ID', status_code=400) - else: - # Using the same token as before is OK. The DSS only starts a new - # checkout if the token is absent. Otherwise the token undergoes - # minimal validation and receives an update to the `attempts` key - # (which is not used for anything besides perhaps diagnostics). - response = self.dss_get_file(file_uuid, replica, **{ - **query_params, - 'directurl': replica == 'gcp', - 'token': token - }) - if response.status_code == 301: - headers = {'retry-after': response.headers['retry-after']} - # DRS says no body for 202 responses - return Response(body='', status_code=202, headers=headers) - elif response.status_code == 302: - retry_url = response.headers['location'] - return Response(self._access_url(retry_url)) - else: - # For errors, just proxy DSS response - return Response(response.text, status_code=response.status_code) - - def dss_get_file(self, file_uuid, replica, **kwargs): - dss_params = { - 'replica': replica, - **kwargs - } - url = self.dss_file_url(file_uuid) - return requests.get(str(url), params=dss_params, allow_redirects=False) - - @classmethod - def dss_file_url(cls, file_uuid: str) -> mutable_furl: - return furl(config.dss_endpoint).add(path=('files', file_uuid)) - - @deprecated('DOS support will be removed') - def dos_get_object(self, catalog, file_uuid, file_version, authentication): - file = self.service.get_data_file(catalog=catalog, - file_uuid=file_uuid, - file_version=file_version, - filters=self.get_filters(catalog, authentication, None)) - if file is not None: - data_obj = self.file_to_drs(catalog, file) - assert data_obj['id'] == file_uuid - assert file_version is None or data_obj['version'] == file_version - return Response({'data_object': data_obj}, status_code=200) - else: - return Response({'msg': 'Data object not found.'}, status_code=404) - - @deprecated('DOS support will be removed') - def _dos_gs_url(self, file_uuid, version) -> mutable_furl: - url = self.dss_file_url(file_uuid) - params = dict({'file_version': version} if version else {}, - directurl=True, - replica='gcp') - while True: - if self.lambda_context.get_remaining_time_in_millis() / 1000 > 3: - dss_response = requests.get(url, params=params, allow_redirects=False) - if dss_response.status_code == 302: - url = furl(dss_response.next.url) - assert url.scheme == 'gs', R('Expected a gs:// URL', url) - return url - elif dss_response.status_code == 301: - url = dss_response.next.url - remaining_lambda_seconds = self.lambda_context.get_remaining_time_in_millis() / 1000 - server_side_sleep = min(1, - max(remaining_lambda_seconds - config.api_gateway_timeout_padding - 3, 0)) - time.sleep(server_side_sleep) - else: - raise ChaliceViewError({ - 'msg': f'Received {dss_response.status_code} from DSS. Could not get file' - }) - else: - raise GatewayTimeoutError({ - 'msg': f"DSS timed out getting file: '{file_uuid}', version: '{version}'." - }) - - @deprecated('DOS support will be removed') - def file_to_drs(self, catalog: CatalogName, file: File): - """ - Converts an aggregate file document to a DRS data object response. - """ - urls = [ - self.file_url_func(catalog=catalog, - file_uuid=file.uuid, - version=file.version, - fetch=False, - wait='1', - fileName=file.name), - self._dos_gs_url(file.uuid, file.version) - ] - - return { - 'id': file.uuid, - 'urls': [ - { - 'url': str(url) - } - for url in urls - ], - 'size': str(file.size), - 'checksums': [ - { - 'checksum': file.sha256, - 'type': 'sha256' - } - ], - 'aliases': [file.name], - 'version': file.version, - 'name': file.name - } - - -class GatewayTimeoutError(ChaliceViewError): - STATUS_CODE = 504 - - -@dataclass -class DRSObject: - """" - Used to build up a https://ga4gh.github.io/data-repository-service-schemas/docs/#_drsobject - """ - uuid: str - version: str | None = None - access_methods: list[MutableJSON] = field(default_factory=list) - - def add_access_method(self, - access_method: AccessMethod, *, - url: str | None = None, - access_id: str | None = None): - """ - We only currently use `url_type`s of 'https' and 'gs'. Only one of `url` - and `access_id` should be specified. - """ - assert url is None or access_id is None - self.access_methods.append({ - 'type': access_method.scheme, - **({} if access_id is None else {'access_id': access_id}), - **({} if url is None else {'access_url': {'url': url}}), - }) - - def to_json(self) -> JSON: - args = adict(replica='aws', version=self.version) - url = DRSController.dss_file_url(self.uuid).add(args=args) - headers = requests.head(url).headers - version = headers['x-dss-version'] - if self.version is not None: - assert version == self.version - uri = dss_drs_object_uri(file_uuid=self.uuid, file_version=version) - return { - **{ - 'checksums': [ - {'sha1': headers['x-dss-sha1']}, - {'sha-256': headers['x-dss-sha256']} - ], - 'created_time': timestamp(version), - 'id': self.uuid, - 'self_uri': str(uri), - 'size': headers['x-dss-size'], - 'version': version - }, - 'access_methods': self.access_methods - } - - -def timestamp(version): - """ - Convert a DSS version into a proper, RFC3339 compliant timestamp. - - >>> timestamp('2019-08-01T211621.345939Z') - '2019-08-01T21:16:21.345939Z' - - >>> timestamp('2019-08-01T211621:345939Z') - Traceback (most recent call last): - ... - ValueError: time data '2019-08-01T211621:345939Z' does not match format '%Y-%m-%dT%H%M%S.%fZ' - """ - return datetime.strptime(version, dss.version_format).isoformat() + 'Z' - - -def encode_access_id(token_str: str, replica: str) -> str: - """ - Encode a given token as an access ID using URL-safe base64 without padding. - - Standard base64 pads the result with equal signs (`=`). Those would need to - be URL-encoded when used in the query portion of a URL: - - >>> base64.urlsafe_b64encode(b"('back on boogie street', 'aws')") - b'KCdiYWNrIG9uIGJvb2dpZSBzdHJlZXQnLCAnYXdzJyk=' - - This function strips that padding. The padding is redundant as long as the - length of the encoded string is known at the time of decoding. With URL - query parameters this is always the case. - - >>> encode_access_id('back on boogie street', 'aws') - 'KCdiYWNrIG9uIGJvb2dpZSBzdHJlZXQnLCAnYXdzJyk' - - >>> decode_access_id(encode_access_id('back on boogie street', 'aws')) - ('back on boogie street', 'aws') - - >>> bad_access_id = 'KHsnbm90IGEnOiAnc3RyaW5nJ30sICdhd3MnKQ' - >>> base64.urlsafe_b64decode(bad_access_id + '==') - b"({'not a': 'string'}, 'aws')" - - >>> decode_access_id(bad_access_id) - Traceback (most recent call last): - ... - ValueError: Malformed access ID - """ - access_id = repr((token_str, replica)).encode() - access_id = base64.urlsafe_b64encode(access_id) - return access_id.rstrip(b'=').decode() - - -def decode_access_id(access_id: str) -> tuple[str, str]: - token = access_id.encode('ascii') # Base64 is a subset of ASCII - padding = b'=' * (-len(token) % 4) - token = base64.urlsafe_b64decode(token + padding) - token, replica = literal_eval(token.decode()) - if not isinstance(token, str) or not isinstance(replica, str): - raise ValueError('Malformed access ID') - return token, replica - - -def dss_drs_object_uri(*, - file_uuid: str, - file_version: str | None = None, - base_url: furl | None = None - ) -> mutable_furl: - """ - The drs:// URL for a given DSS file UUID and version. The return value will - point at the bare-bones DRS data object endpoint in the web service. - - :param file_uuid: the DSS file UUID of the file - - :param file_version: the DSS file version of the file - - :param base_url: an optional service endpoint, e.g. for local test servers. - If absent, the service endpoint for the current deployment - will be used. - """ - return drs_object_uri(base_url=_base_url(base_url), - path=(file_uuid,), - params=_url_query(file_version)) - - -def dss_dos_object_url(*, - catalog: CatalogName, - file_uuid: str, - file_version: str | None = None, - base_url: furl | None = None - ) -> mutable_furl: - """ - The http:// or https:// URL for a given DSS file UUID and version. The - return value will point at the bare-bones DOS data object endpoint in the - web service. - - :param catalog: the name of the catalog to retrieve the file from - - :param file_uuid: the DSS file UUID of the file - - :param file_version: the DSS file version of the file - - :param base_url: an optional service endpoint, e.g. for local test servers. - If absent, the service endpoint for the current deployment - will be used. - """ - return furl(url=_base_url(base_url), - path=dos_object_url_path(file_uuid), - query_params=dict(_url_query(file_version), catalog=catalog)) - - -def dss_drs_object_url(*, - file_uuid: str, - file_version: str | None = None, - base_url: furl | None = None, - access_id: str | None = None - ) -> mutable_furl: - """ - The http:// or https:// URL for a given DSS file UUID and version. The - return value will point at the bare-bones DRS data object endpoint in the - web service. - - :param file_uuid: the DSS file UUID of the file - - :param file_version: the optional DSS file version of the file - - :param base_url: an optional service endpoint, e.g. for local test servers. - If absent, the service endpoint for the current deployment - will be used. - - :param access_id: access id will be included in the URL if this parameter is - supplied - """ - return furl(url=_base_url(base_url), - path=drs_object_url_path(object_id=file_uuid, access_id=access_id), - args=_url_query(file_version)) - - -def _base_url(base_url: furl | None) -> furl: - return config.drs_endpoint if base_url is None else base_url - - -def _url_query(file_version: str | None) -> Mapping[str, str]: - return {'version': file_version} if file_version else {} diff --git a/src/azul/service/elasticsearch_service.py b/src/azul/service/elasticsearch_service.py deleted file mode 100644 index 62a286127f..0000000000 --- a/src/azul/service/elasticsearch_service.py +++ /dev/null @@ -1,712 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - defaultdict, -) -from collections.abc import ( - Iterable, - Mapping, - Sequence, -) -import json -import logging -from typing import ( - Any, - Generic, - Self, - TypeVar, - TypedDict, -) - -import attr -from more_itertools import ( - one, -) -from opensearchpy import ( - A, - OpenSearch, - Q, - Search, -) -from opensearchpy.helpers.aggs import ( - Agg, - Terms, -) -from opensearchpy.helpers.query import ( - Query, -) -from opensearchpy.helpers.response import ( - Response, -) - -from azul import ( - CatalogName, - R, - cached_property, - config, -) -from azul.es import ( - ESClientFactory, -) -from azul.indexer.document import ( - DocumentType, - IndexName, -) -from azul.indexer.document_service import ( - DocumentService, -) -from azul.indexer.field import ( - Nested, -) -from azul.plugins import ( - DocumentSlice, - FieldPath, - MetadataPlugin, - dotted, -) -from azul.service import ( - Filters, - FiltersJSON, -) -from azul.types import ( - JSON, - JSONs, - MutableJSON, - PrimitiveJSON, -) - -log = logging.getLogger(__name__) - - -class IndexNotFoundError(Exception): - - def __init__(self, missing_index: str): - super().__init__(f'Index `{missing_index}` was not found') - - -R1 = TypeVar('R1') -R2 = TypeVar('R2') - - -class ElasticsearchStage(Generic[R1, R2], metaclass=ABCMeta): - """ - A stage in a chain of responsibility to prepare an Elasticsearch request and - to process the response to that request. If an implementation modifies the - argument in place, it must return the argument. - """ - - @abstractmethod - def prepare_request(self, request: Search) -> Search: - """ - Modify the given request and return the argument or convert the given - request and return the result of the conversion. - """ - raise NotImplementedError - - @abstractmethod - def process_response(self, response: R1) -> R2: - """ - Handle the given response and return the result of the processing. - If an implementation modifies the argument in place it must return the - argument. - """ - raise NotImplementedError - - -R0 = TypeVar('R0') - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class ElasticsearchChain(ElasticsearchStage[R0, R2]): - """ - The result of wrapping a stage or chain in another stage. - """ - - inner: ElasticsearchStage[R0, R1] - outer: ElasticsearchStage[R1, R2] - - def __attrs_post_init__(self): - assert not isinstance(self.outer, ElasticsearchChain), R( - 'Outer stage must not be a chain', type(self.outer)) - - def prepare_request(self, request: Search) -> Search: - request = self.inner.prepare_request(request) - request = self.outer.prepare_request(request) - return request - - def process_response(self, response0: R0) -> R2: - response1: R1 = self.inner.process_response(response0) - response2: R2 = self.outer.process_response(response1) - return response2 - - def stages(self) -> Iterable[ElasticsearchStage]: - yield self.outer - if isinstance(self.inner, ElasticsearchChain): - yield from self.inner.stages() - else: - yield self.inner - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class _ElasticsearchStage(ElasticsearchStage[R1, R2], metaclass=ABCMeta): - """ - A base implementation of a stage. - """ - service: DocumentService - catalog: CatalogName - entity_type: str - - @cached_property - def plugin(self) -> MetadataPlugin: - return self.service.metadata_plugin(self.catalog) - - def wrap(self, other: ElasticsearchStage[R0, R1]) -> ElasticsearchChain[R0, R2]: - return ElasticsearchChain(inner=other, outer=self) - - -TranslatedFilters = Mapping[FieldPath, Mapping[str, Sequence[PrimitiveJSON]]] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class FilterStage(_ElasticsearchStage[Response, Response]): - """ - Converts the given filters to an Elasticsearch query and adds that query as - either a `query` or `post_filter` property to the request. - """ - filters: Filters - post_filter: bool - - def prepare_request(self, request: Search) -> Search: - query = self.prepare_query() - if self.post_filter: - request = request.post_filter(query) - else: - request = request.query(query) - return request - - def process_response(self, response: Response) -> Response: - return response - - @cached_property - def prepared_filters(self) -> TranslatedFilters: - limit_access = self.service.always_limit_access or self._limit_access - filters_json = self.filters.reify(self.plugin, limit_access=limit_access) - return self._translate_filters(filters_json) - - @property - @abstractmethod - def _limit_access(self) -> bool: - """ - Whether to enforce the managed access controls during filter - reification, provided that the service allows such conditional - enforcement of access. If it doesn't, the return value should be - ignored, and access must be enforced unconditionally. - """ - raise NotImplementedError - - def _translate_filters(self, filters: FiltersJSON) -> TranslatedFilters: - """ - Translate the field values in the given filter JSON to their respective - Elasticsearch form, using the field types, the field names to field - paths. - """ - catalog = self.catalog - field_mapping = self.plugin.field_mapping - translated_filters = {} - for field, filter in filters.items(): - field = field_mapping[field] - relation, values = one(filter.items()) - field_type = self.service.field_type(catalog, field) - values = field_type.filter(relation, values) - translated_filters[field] = {relation: list(values)} - return translated_filters - - def prepare_query(self, skip_field_paths: tuple[FieldPath] = ()) -> Query: - """ - Converts the given filters into an Elasticsearch DSL Query object. - """ - filter_list = [] - for field_path, relation_and_values in self.prepared_filters.items(): - if field_path not in skip_field_paths: - relation, values = one(relation_and_values.items()) - # Note that `is_not` is only used internally (for filtering by - # inaccessible sources) - if relation in ('is', 'is_not'): - field_type = self.service.field_type(self.catalog, field_path) - if isinstance(field_type, Nested): - term_queries = [] - for nested_field, nested_value in one(values).items(): - nested_body = {dotted(field_path, nested_field, 'keyword'): nested_value} - term_queries.append(Q('term', **nested_body)) - query = Q('nested', path=dotted(field_path), query=Q('bool', must=term_queries)) - else: - query = Q('terms', **{dotted(field_path, 'keyword'): values}) - translated_none = field_type.to_index(None) - if translated_none in values: - # Note that at this point None values in filters have already - # been translated e.g. {'is': ['~null']} and if the filter has a - # None our query needs to find fields with None values as well - # as absent fields - absent_query = Q('bool', must_not=[Q('exists', field=dotted(field_path))]) - query = Q('bool', should=[query, absent_query]) - if relation == 'is_not': - query = Q('bool', must_not=[query]) - filter_list.append(query) - elif relation in ('contains', 'within', 'intersects'): - for value in values: - value = value | {'relation': relation} - filter_list.append(Q('range', **{dotted(field_path): value})) - else: - assert False - - # Each iteration will AND the contents of the list - query_list = [Q('constant_score', filter=f) for f in filter_list] - - return Q('bool', must=query_list) - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class AggregationStage(_ElasticsearchStage[MutableJSON, MutableJSON]): - """ - Cooperate with the given filter stage to augment the request with an - `aggregation` property containing an aggregation for each of the facet - fields configured in the current metadata plugin. If this aggregation stage - is to be part of a chain, the chain should include the given filter stage. - """ - filter_stage: FilterStage - - @classmethod - def create_and_wrap(cls, - chain: ElasticsearchChain[R0, MutableJSON] - ) -> ElasticsearchChain[R0, MutableJSON]: - """ - Creates and adds an aggregation stage to the specified chain. The chain - must contain a filter stage. - """ - filter_stage = one(s for s in chain.stages() if isinstance(s, FilterStage)) - aggregation_stage = cls(service=filter_stage.service, - catalog=filter_stage.catalog, - entity_type=filter_stage.entity_type, - filter_stage=filter_stage) - return aggregation_stage.wrap(chain) - - def prepare_request(self, request: Search) -> Search: - field_mapping = self.plugin.field_mapping - for facet in self.plugin.facets: - # FIXME: Aggregation filters may be redundant when post_filter is false - # https://github.com/DataBiosphere/azul/issues/3435 - aggregate = self._prepare_aggregation(facet=facet, - facet_path=field_mapping[facet]) - request.aggs.bucket(facet, aggregate) - self._annotate_aggs_for_translation(request) - return request - - def process_response(self, response: MutableJSON) -> MutableJSON: - try: - aggs = response['aggregations'] - except KeyError: - pass - else: - self._flatten_nested_aggs(aggs) - self._translate_response_aggs(aggs) - self._populate_accessible(aggs) - return response - - def _prepare_aggregation(self, *, facet: str, facet_path: FieldPath) -> Agg: - """ - Creates an aggregation to be used in an Elasticsearch search request. - """ - # Create a filter agg using a query that represents all filters - # except for the current facet. - query = self.filter_stage.prepare_query(skip_field_paths=(facet_path,)) - agg = A('filter', query) - - field_type = self.service.field_type(self.catalog, facet_path) - if isinstance(field_type, Nested): - nested_agg = agg.bucket(name='nested', - agg_type='nested', - path=dotted(facet_path)) - facet_path = dotted(facet_path, field_type.agg_property) - else: - nested_agg = agg - # Make an inner agg that will contain the terms in question - path = dotted(facet_path, 'keyword') - # FIXME: Approximation errors for terms aggregation are unchecked - # https://github.com/DataBiosphere/azul/issues/3413 - nested_agg.bucket(name='myTerms', - agg_type='terms', - field=path, - size=config.terms_aggregation_size) - nested_agg.bucket('untagged', 'missing', field=path) - return agg - - def _annotate_aggs_for_translation(self, request: Search): - """ - Annotate the aggregations in the given Elasticsearch search request so - we can later translate substitutes for None in the aggregations part of - the response. - """ - - def annotate(agg: Agg): - if isinstance(agg, Terms): - path = agg.field.split('.') - if path[-1] == 'keyword': - path.pop() - if not hasattr(agg, 'meta'): - agg.meta = {} - agg.meta['path'] = path - if hasattr(agg, 'aggs'): - subs = agg.aggs - for sub_name in subs: - annotate(subs[sub_name]) - - for agg_name in request.aggs: - annotate(request.aggs[agg_name]) - - def _flatten_nested_aggs(self, aggs: MutableJSON): - for facet, agg in aggs.items(): - try: - nested_agg = agg.pop('nested') - except KeyError: - pass - else: - agg.update(nested_agg) - - def _translate_response_aggs(self, aggs: MutableJSON): - """ - Translate substitutes for None in the aggregations part of an - Elasticsearch response. - """ - - def translate(k, v: MutableJSON): - try: - buckets = v['buckets'] - except KeyError: - for k, v in v.items(): - if isinstance(v, dict): - translate(k, v) - else: - try: - path = v['meta']['path'] - except KeyError: - pass - else: - field_type = self.service.field_type(self.catalog, tuple(path)) - for bucket in buckets: - bucket['key'] = field_type.from_index(bucket['key']) - translate(k, bucket) - - for k, v in aggs.items(): - translate(k, v) - - def _populate_accessible(self, aggs: MutableJSON) -> None: - # Because the value of the `accessible` field depends on the provided - # authentication, we have to synthesize the field and its corresponding - # facet from the `sourceId` field. - source_ids = self.filter_stage.filters.source_ids - plugin = self.service.metadata_plugin(self.catalog) - special_fields = plugin.special_fields - agg = aggs.pop(special_fields.source_id) - counts_by_accessibility: dict[bool, int] = defaultdict(int) - for bucket in agg['myTerms']['buckets']: - accessible = bucket['key'] in source_ids - counts_by_accessibility[accessible] += bucket['doc_count'] - agg['myTerms']['buckets'] = [ - {'key': accessible, 'doc_count': count} - for accessible, count in counts_by_accessibility.items() - ] - aggs[special_fields.accessible] = agg - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class SlicingStage(_ElasticsearchStage[Response, Response]): - """ - Augments the request with a document slice (known as a *source filter* in - Elasticsearch land) to restrict the set of properties in each hit in the - response. If the given document slice is None, the default one from the - plugin is used. If that is None, too, each hit will contain all properties. - """ - document_slice: DocumentSlice | None - - def prepare_request(self, request: Search) -> Search: - document_slice = self._prepared_slice() - if document_slice is not None: - request = request.source(**document_slice) - return request - - def process_response(self, response: Response) -> Response: - return response - - def _prepared_slice(self) -> DocumentSlice | None: - if self.document_slice is None: - return self.plugin.document_slice(self.entity_type) - else: - return self.document_slice - - -# FIXME: Elminate Eliminate reliance on Elasticsearch DSL -# https://github.com/DataBiosphere/azul/issues/4111 - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class ToDictStage(_ElasticsearchStage[Response, MutableJSON]): - - def prepare_request(self, request: Search) -> Search: - return request - - def process_response(self, response: Response) -> MutableJSON: - return response.to_dict() - - -SortKey = tuple[Any, str] - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class Pagination: - order: str - size: int - sort: str - search_before: SortKey | None = None - search_after: SortKey | None = None - - def __attrs_post_init__(self): - self._check_sort_key(self.search_before) - self._check_sort_key(self.search_after) - - def _check_sort_key(self, sort_key): - if sort_key is not None: - assert isinstance(sort_key, tuple), R( - 'Not a tuple', sort_key) - assert len(sort_key) == 2, R( - 'Not a tuple with two elements', sort_key) - assert isinstance(sort_key[1], str), R( - 'Second sort key element not a string', sort_key) - - def advance(self, - *, - search_before: SortKey | None, - search_after: SortKey | None - ) -> Self: - return attr.evolve(self, - search_before=search_before, - search_after=search_after) - - def link(self, *, previous: bool, **params: str) -> str | None: - """ - Return the URL of the next or previous page in this pagination or None - if there is no such page. - - :param previous: True, for a link to the previous page, False for a link - to the next one. - - :param params: Additional query parameters to embed in the URL - """ - return None - - -class ResponsePagination(TypedDict): - count: int - total: int - size: int - pages: int - next: str | None - previous: str | None - sort: str - order: str - - -ResponseTriple = tuple[JSONs, ResponsePagination, JSON] - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class PaginationStage(_ElasticsearchStage[JSON, ResponseTriple]): - """ - Handles the pagination of search results - """ - pagination: Pagination - - #: If True, request one more hit so that _generate_paging_dict can know if - #: there is another page. Use this to prevent a last page that's empty. - peek_ahead: bool - - filters: Filters - - def prepare_request(self, request: Search) -> Search: - sort_order = self.pagination.order - sort_field = self.plugin.field_mapping[self.pagination.sort] - field_type = self.service.field_type(self.catalog, sort_field) - sort_mode = field_type.es_sort_mode - sort_field = dotted(sort_field, 'keyword') - - def sort(order): - assert order in ('asc', 'desc'), order - return ( - { - sort_field: { - 'order': order, - 'mode': sort_mode, - 'missing': '_last' if order == 'asc' else '_first', - **( - {} - if field_type.es_type is None else - {'unmapped_type': field_type.es_type} - ) - } - }, - # This secondary sort field serves as the tiebreaker for when - # the primary sort field is not unique across documents. - # Otherwise it's redundant, especially if it's the same as the - # primary sort field. However, always having a secondary - # simplifies the code and most real-world use cases use sort - # fields that are not unique. - { - 'entity_id.keyword': { - 'order': order - } - } - ) - - # Using search_after/search_before pagination - if self.pagination.search_after is not None: - request = request.extra(search_after=self.pagination.search_after) - request = request.sort(*sort(sort_order)) - elif self.pagination.search_before is not None: - request = request.extra(search_after=self.pagination.search_before) - rev_order = 'asc' if sort_order == 'desc' else 'desc' - request = request.sort(*sort(rev_order)) - else: - request = request.sort(*sort(sort_order)) - - # FIXME: Remove this or change to 10000 (the default) - # https://github.com/DataBiosphere/azul/issues/3770 - request = request.extra(track_total_hits=True) - - assert isinstance(self.peek_ahead, bool), type(self.peek_ahead) - # fetch one more than needed to see if there's a "next page". - request = request.extra(size=self.pagination.size + self.peek_ahead) - - return request - - def process_response(self, response: JSON) -> ResponseTriple: - """ - Returns hits and pagination as dict - """ - # The slice is necessary because we may have fetched an extra entry to - # determine if there is a previous or next page. - hits = self._extract_hits(response) - hits = self._translate_hits(hits) - pagination = self._process_pagination(response) - aggregations = response.get('aggregations', {}) - return hits, pagination, aggregations - - def _extract_hits(self, response): - hits = response['hits']['hits'][0:self.pagination.size] - if self.pagination.search_before is not None: - hits = reversed(hits) - hits = [hit['_source'] for hit in hits] - return hits - - def _translate_hits(self, hits): - hits = self.service.translate_fields(self.catalog, hits, forward=False) - return hits - - def _process_pagination(self, response: JSON) -> MutableJSON: - total = response['hits']['total'] - # FIXME: Handle other relations - # https://github.com/DataBiosphere/azul/issues/3770 - assert total['relation'] == 'eq' - pages = -(-total['value'] // self.pagination.size) - - # ... else use search_after/search_before pagination - hits: JSONs = response['hits']['hits'] - count = len(hits) - if self.pagination.search_before is None: - # hits are normal sorted - if count > self.pagination.size: - # There is an extra hit, indicating a next page. - count -= 1 - search_after = tuple(hits[count - 1]['sort']) - else: - # No next page - search_after = None - if self.pagination.search_after is not None: - search_before = tuple(hits[0]['sort']) - else: - search_before = None - else: - # hits are reverse sorted - if count > self.pagination.size: - # There is an extra hit, indicating a previous page. - count -= 1 - search_before = tuple(hits[count - 1]['sort']) - else: - # No previous page - search_before = None - search_after = tuple(hits[0]['sort']) - - pagination = self.pagination.advance(search_before=search_before, - search_after=search_after) - - def page_link(*, previous): - url = pagination.link(previous=previous, - catalog=self.catalog, - filters=json.dumps(self.filters.explicit)) - return None if url is None else str(url) - - return ResponsePagination(count=count, - total=total['value'], - size=pagination.size, - next=page_link(previous=False), - previous=page_link(previous=True), - pages=pages, - sort=pagination.sort, - order=pagination.order) - - -class ElasticsearchService(DocumentService): - - @cached_property - def _es_client(self) -> OpenSearch: - return ESClientFactory.get() - - def create_chain(self, - *, - catalog: CatalogName, - entity_type: str, - filters: Filters, - post_filter: bool, - document_slice: DocumentSlice | None - ) -> ElasticsearchChain[Response, Response]: - """ - Create a chain for a basic Elasticsearch `search` request for documents - matching the given filter, optionally restricting the set of properties - returned for each matching document. - """ - plugin = self.metadata_plugin(catalog) - - # noinspection PyArgumentList - chain = plugin.filter_stage(service=self, - catalog=catalog, - entity_type=entity_type, - filters=filters, - post_filter=post_filter) - chain = SlicingStage(service=self, - catalog=catalog, - entity_type=entity_type, - document_slice=document_slice).wrap(chain) - return chain - - def create_request(self, - catalog: CatalogName, - entity_type: str, - doc_type: DocumentType = DocumentType.aggregate - ) -> Search: - """ - Create an Elasticsearch request against the index containing documents - of the given entity and document types, in the given catalog. - """ - return Search(using=self._es_client, - index=str(IndexName.create(catalog=catalog, - qualifier=entity_type, - doc_type=doc_type))) diff --git a/src/azul/service/lambda_iam_policy.py b/src/azul/service/lambda_iam_policy.py deleted file mode 100644 index 5947099bb0..0000000000 --- a/src/azul/service/lambda_iam_policy.py +++ /dev/null @@ -1,212 +0,0 @@ -from azul import ( - config, -) -from azul.collections import ( - alist, -) -from azul.deployment import ( - aws, -) -from azul.modules import ( - load_app_module, -) -from azul.terraform import ( - chalice, -) - -direct_access_role = config.dss_direct_access_role('service') -service = load_app_module('service') - -policy = { - 'Version': '2012-10-17', - 'Statement': [ - { - 'Effect': 'Allow', - 'Action': [ - 'logs:CreateLogGroup', - 'logs:CreateLogStream', - 'logs:PutLogEvents' - ], - 'Resource': 'arn:aws:logs:*:*:*' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'es:ESHttpDelete', - 'es:ESHttpGet', - 'es:ESHttpHead', - 'es:ESHttpPut', - 'es:ESHttpPost', - 'es:ESHttpDelete' - ], - 'Resource': f'arn:aws:es:{aws.region_name}:{aws.account}:domain/{config.es_domain}/*' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'es:DescribeElasticsearchDomain' - ], - 'Resource': f'arn:aws:es:{aws.region_name}:{aws.account}:domain/{config.es_domain}' - }, - { - 'Effect': 'Allow', - 'Action': [ - 'sqs:GetQueueAttributes', - 'sqs:GetQueueUrl', - ], - 'Resource': [ - f'arn:aws:sqs:{aws.region_name}:{aws.account}:{name}' - for name in config.all_queue_names - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'secretsmanager:GetSecretValue' - ], - 'Resource': [ - f'arn:aws:secretsmanager:{aws.region_name}:{aws.account}:secret:*' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 's3:PutObject', - 's3:GetObject', - 's3:PutObjectAcl', - 's3:PutObjectTagging', - 's3:GetObjectTagging' - ], - 'Resource': [ - '${aws_s3_bucket.%s.arn}/*' % config.storage_term, - f'arn:aws:s3:::{aws.shared_bucket}/*' - ] - }, - # Needed for GetObject to work in versioned bucket - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObjectVersion' - ], - 'Resource': [ - f'arn:aws:s3:::{aws.shared_bucket}/*' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 's3:ListBucket' # Without this, GetObject and HeadObject yield 403 for missing keys, not 404 - ], - 'Resource': [ - '${aws_s3_bucket.%s.arn}' % config.storage_term, - f'arn:aws:s3:::{aws.shared_bucket}' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObject', - 's3:ListBucket' - ], - 'Resource': [ - f'arn:aws:s3:::{resource}' - for bucket in alist(aws.mirror_bucket, config.mirror_bucket) - for resource in [bucket, f'{bucket}/*'] - ] - }, - *( - [ - # Remove once https://github.com/HumanCellAtlas/data-store/issues/1837 is resolved - { - 'Effect': 'Allow', - 'Action': [ - 's3:GetObject', - ], - 'Resource': [ - f'arn:aws:s3:::{aws.dss_checkout_bucket(config.dss_endpoint)}/*', - ] - }, - # Remove once https://github.com/HumanCellAtlas/data-store/issues/1837 is resolved - { - 'Effect': 'Allow', - 'Action': [ - 's3:ListBucket' - # Without this, GetObject and HeadObject yield 403 for missing keys, not 404 - ], - 'Resource': [ - f'arn:aws:s3:::{aws.dss_checkout_bucket(config.dss_endpoint)}' - ] - } - ] if config.dss_endpoint else [] - ), - { - 'Effect': 'Allow', - 'Action': [ - 'dynamodb:Query', - 'dynamodb:GetItem', - 'dynamodb:PutItem', - 'dynamodb:UpdateItem', - 'dynamodb:DeleteItem', - 'dynamodb:BatchWriteItem', - 'dynamodb:DescribeTable' - ], - 'Resource': [ - f'arn:aws:dynamodb:{aws.region_name}:{aws.account}:table/{table_name}' - for table_name in ( - config.dynamo_object_version_table_name, - config.dynamo_sources_cache_table_name - ) - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'states:StartExecution' - ], - 'Resource': [ - f'arn:aws:states:{aws.region_name}:{aws.account}:stateMachine:' - f'{config.qualified_resource_name(config.manifest_sfn)}' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'states:DescribeExecution' - ], - 'Resource': [ - f'arn:aws:states:{aws.region_name}:{aws.account}:execution:' - f'{config.qualified_resource_name(config.manifest_sfn)}*' - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'kms:GenerateMac', - 'kms:VerifyMac' - ], - 'Resource': [ - '${aws_kms_key.%s.arn}' % config.manifest_kms_key_tf_name - ] - }, - { - 'Effect': 'Allow', - 'Action': [ - 'ssm:GetParameter' - ], - 'Resource': [ - f'arn:aws:ssm:{aws.region_name}:{aws.account}:parameter/dcp/*' - ] - }, - *( - [ - { - 'Effect': 'Allow', - 'Action': 'sts:AssumeRole', - 'Resource': direct_access_role - } - ] if direct_access_role is not None else [ - ] - ), - *chalice.vpc_lambda_iam_policy() - ] -} diff --git a/src/azul/service/manifest_controller.py b/src/azul/service/manifest_controller.py deleted file mode 100644 index 72cc9f67f8..0000000000 --- a/src/azul/service/manifest_controller.py +++ /dev/null @@ -1,302 +0,0 @@ -from collections.abc import ( - Mapping, -) -from typing import ( - TypedDict, - cast, - get_type_hints, -) - -import attr -from chalice import ( - BadRequestError, - ChaliceViewError, - Response, -) -from furl import ( - furl, -) - -from azul import ( - cached_property, - config, -) -from azul.auth import ( - Authentication, -) -from azul.chalice import ( - GoneError, -) -from azul.plugins import ( - ManifestFormat, -) -from azul.service import ( - Filters, -) -from azul.service.async_manifest_service import ( - AsyncManifestService, - GenerationFailed, - GenerationFinished, - InvalidTokenError, - NoSuchGeneration, - Token, -) -from azul.service.manifest_service import ( - CachedManifestNotFound, - InvalidManifestKey, - InvalidManifestKeySignature, - Manifest, - ManifestKey, - ManifestPartition, - ManifestService, - ManifestUrlFunc, - SignedManifestKey, -) -from azul.service.source_controller import ( - SourceController, -) -from azul.service.storage_service import ( - StorageService, -) -from azul.types import ( - FlatJSON, - JSON, -) - -manifest_state_key = 'manifest' - - -class ManifestGenerationState(TypedDict, total=False): - manifest_key: JSON - filters: JSON - partition: JSON | None - manifest: JSON | None - - -assert manifest_state_key in get_type_hints(ManifestGenerationState) - - -@attr.s(frozen=True, auto_attribs=True, kw_only=True) -class ManifestController(SourceController): - manifest_url_func: ManifestUrlFunc - - @cached_property - def async_service(self) -> AsyncManifestService: - return AsyncManifestService() - - @cached_property - def service(self) -> ManifestService: - return ManifestService(StorageService(), self.file_url_func) - - def get_manifest(self, state: JSON) -> ManifestGenerationState: - # We trust StepFunctions to pass - state: ManifestGenerationState - partition = ManifestPartition.from_json(state['partition']) - manifest_key = ManifestKey.from_json(state['manifest_key']) - result = self.service.get_manifest(format=manifest_key.format, - catalog=manifest_key.catalog, - filters=Filters.from_json(state['filters']), - partition=partition, - manifest_key=manifest_key) - if isinstance(result, ManifestPartition): - assert not result.is_last, result - return { - **state, - 'partition': result.to_json() - } - elif isinstance(result, Manifest): - return { - # The presence of this key terminates the step function loop - 'manifest': result.to_json() - } - else: - assert False, type(result) - - def _unpack_token_or_key(self, - token_or_key: str - ) -> tuple[Token | None, SignedManifestKey | None]: - if token_or_key is None: - return None, None - else: - try: - return Token.decode(token_or_key), None - except InvalidTokenError: - try: - return None, SignedManifestKey.decode(token_or_key) - except InvalidManifestKey: - # The OpenAPI spec doesn't distinguish key and token - raise BadRequestError('Invalid token') - - def _start_execution(self, - filters: Filters, - manifest_key: ManifestKey, - previous_token: Token | None = None, - ) -> Token: - partition = ManifestPartition.first() - state: ManifestGenerationState = { - 'filters': filters.to_json(), - 'manifest_key': manifest_key.to_json(), - 'partition': partition.to_json() - } - # Manifest keys for catalogs with long names would be too long to be - # used directly as state machine execution names. - generation_id = manifest_key.uuid - # ManifestGenerationState is also JSON but there is no way to express - # that since TypedDict rejects a co-parent class. - input = cast(JSON, state) - next_iteration = 0 if previous_token is None else previous_token.iteration + 1 - for i in range(10): - try: - return self.async_service.start_generation(generation_id, - input, - iteration=next_iteration + i) - except GenerationFinished: - pass - raise ChaliceViewError('Too many executions of this manifest generation') - - def get_manifest_async(self, - *, - token_or_key: str, - query_params: Mapping[str, str], - fetch: bool, - authentication: Authentication | None): - - token, manifest_key = self._unpack_token_or_key(token_or_key) - - if token is None: - if manifest_key is None: - # Neither a token representing an ongoing execution was given, - # nor the key of an already cached manifest. There could still - # be a cached manifest, so we'll need to look it up. - format = ManifestFormat(query_params['format']) - catalog = query_params.get('catalog', config.default_catalog) - filters = self.get_filters(catalog, authentication, query_params.get('filters')) - try: - manifest = self.service.get_cached_manifest(format=format, - catalog=catalog, - filters=filters) - except CachedManifestNotFound as e: - # A cache miss, but the exception tells us the cache key - manifest, manifest_key = None, e.manifest_key - # Prepare the execution that will generate the manifest - token = self._start_execution(filters=filters, - manifest_key=manifest_key) - else: - # A cache hit - manifest_key = manifest.manifest_key - else: - # The client passed the key of a cached manifest, originating - # from the final 302 response to a fetch request for a curl - # manifest (see below). - if fetch: - raise BadRequestError('The fetch endpoint does not support a manifest key') - if authentication is not None: - raise BadRequestError('Must omit authentication when passing a manifest key') - try: - manifest_key = self.service.verify_manifest_key(manifest_key) - manifest = self.service.get_cached_manifest_with_key(manifest_key) - except CachedManifestNotFound: - # We could start another execution but that would require - # the client to follow more redirects. We've already sent - # the final 302 so we shouldn't that. - raise GoneError('The manifest has expired, please request a new one') - except InvalidManifestKeySignature: - raise BadRequestError('Invalid token') - else: - # A token for an ongoing execution was given - assert manifest_key is None, manifest_key - try: - token_or_result = self.async_service.inspect_generation(token) - except NoSuchGeneration: - raise BadRequestError('Invalid token') - except GenerationFailed as e: - raise ChaliceViewError('Failed to generate manifest', e.status, e.output) - if isinstance(token_or_result, Token): - # Execution is still ongoing, we got an updated token - token, manifest, manifest_key = token_or_result, None, None - elif isinstance(token_or_result, dict): - # The execution is done, the resulting manifest should be ready - result = token_or_result - manifest = Manifest.from_json(result['output']['manifest']) - manifest_key = manifest.manifest_key - try: - manifest = self.service.get_cached_manifest_with_key(manifest_key) - except CachedManifestNotFound as e: - assert manifest_key == e.manifest_key - # There are two possible causes for the missing manifest: it - # may have expired, in which case the supplied token must be - # really stale, or the manifest was deleted immediately - # after it was created. We haven't sent a 302 redirect yet, - # so we'll just restart the generation by starting another - # execution for it. - manifest = None - filters = Filters.from_json(result['input']['filters']) - token = self._start_execution(filters=filters, - manifest_key=manifest_key, - previous_token=token) - else: - assert manifest_key == manifest.manifest_key - else: - assert False, token_or_result - - body: dict[str, int | str | FlatJSON] - - if manifest is None: - assert token is not None - url = self.manifest_url_func(fetch=fetch, token_or_key=token.encode()) - body = { - 'Status': 301, - 'Location': str(url), - 'Retry-After': token.retry_after - } - else: - assert manifest.manifest_key == manifest_key - # The manifest is ultimately downloaded via a signed URL that points - # to the storage bucket. This signed URL expires after one hour, - # which is desirable because it is a client and its short lifespan - # reduces the risk of it being shared. However, this also makes it - # unsuitable for cURL downloads that may need to be retried over - # longer timespans (https://github.com/DataBiosphere/azul/issues/2875) - # To allow for cURL manifests to remain valid for longer than 1 - # hour, we instead return a 301 redirect to the non-fetch - # `/manifest/files` endpoint with the object key of the cached - # manifest specified as a query parameter. This object key is also a - # client secret; it is mutually exclusive with OAuth tokens and - # allows for the cached manifest to be downloaded without - # authentication for as long as the cached manifest persists in S3. - # This increases the risk of the secret being shared, but is - # necessary to preserve the functionality of the cURL download. - if fetch and manifest.format is ManifestFormat.curl: - # For AnVIL, we are prohibited from exposing a manifest URL that - # remains valid for longer than 1 hour. Currently, the AnVIL - # plugin does not support cURL-format manifests. - assert not config.is_anvil_enabled(manifest_key.catalog) - manifest_key = self.service.sign_manifest_key(manifest_key) - url = self.manifest_url_func(fetch=False, token_or_key=manifest_key.encode()) - else: - url = furl(self.service.get_manifest_url(manifest)) - body = { - 'Status': 302, - 'Location': str(url), - 'CommandLine': self.service.command_lines(manifest, url, authentication) - } - - # Note: Response objects returned without a 'Content-Type' header will - # be given one of type 'application/json' as default by Chalice. - # https://aws.github.io/chalice/tutorials/basicrestapi.html#customizing-the-http-response - - if fetch: - return Response(body=body) - else: - status = body.pop('Status') - command_line: FlatJSON = body.pop('CommandLine', None) - headers = {k: str(v) for k, v in body.items()} - if command_line is None: - new_body = None - else: - headers['Content-Type'] = 'text/plain' - new_body = ''.join( - f'\nDownload the manifest in {shell} with `curl` using:\n\n{cmd}\n' - for shell, cmd in command_line.items() - ) - return Response(body=new_body, status_code=status, headers=headers) diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py deleted file mode 100644 index 4cba0c10d7..0000000000 --- a/src/azul/service/manifest_service.py +++ /dev/null @@ -1,2095 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -import base64 -from collections.abc import ( - Iterable, - Mapping, -) -import csv -from datetime import ( - datetime, -) -from inspect import ( - isabstract, -) -from io import ( - BytesIO, - TextIOWrapper, -) -import itertools -from itertools import ( - chain, -) -import json -import logging -from math import ( - ceil, -) -from operator import ( - itemgetter, -) -import os -import re -import shlex -from tempfile import ( - mkstemp, -) -import time -from typing import ( - Callable, - ClassVar, - IO, - Protocol, - Self, - cast, -) -import unicodedata -from uuid import ( - UUID, - uuid5, -) - -import attrs -from furl import ( - furl, -) -from more_itertools import ( - always_iterable, - chunked, - one, -) -import msgpack -from opensearchpy import ( - Q, - Search, -) -from opensearchpy.helpers.response import ( - Hit, -) - -from azul import ( - CatalogName, - R, - cached_property, - config, - mutable_furl, -) -from azul.attrs import ( - is_uuid, - strict_auto, -) -from azul.auth import ( - Authentication, -) -from azul.bytes import ( - azul_urlsafe_b64decode, - azul_urlsafe_b64encode, -) -from azul.collections import ( - getitem, -) -from azul.deployment import ( - aws, -) -from azul.indexer import ( - SourceSpec, -) -from azul.indexer.document import ( - DocumentType, - FieldPath, -) -from azul.indexer.field import ( - FieldTypes, - null_str, -) -from azul.json import ( - copy_json, -) -from azul.json_freeze import ( - freeze, - sort_frozen, -) -from azul.plugins import ( - ColumnMapping, - DocumentSlice, - ManifestConfig, - ManifestFormat, - MetadataPlugin, - RepositoryPlugin, - dotted, -) -from azul.service import ( - FileUrlFunc, - Filters, - avro_pfb, -) -from azul.service.avro_pfb import ( - PFBRelation, -) -from azul.service.elasticsearch_service import ( - ElasticsearchChain, - ElasticsearchService, - Pagination, - PaginationStage, - SortKey, - ToDictStage, -) -from azul.service.storage_service import ( - AWS_S3_DEFAULT_MINIMUM_PART_SIZE, - StorageObjectNotFound, - StorageService, -) -from azul.strings import ( - double_quote as dq, -) -from azul.types import ( - AnyJSON, - FlatJSON, - JSON, - JSONs, - MutableJSON, -) -from azul.uuids import ( - uuid5_for_bytes, -) -from azul.vendored.frozendict import ( - frozendict, -) - -log = logging.getLogger(__name__) - - -class ManifestUrlFunc(Protocol): - - def __call__(self, - *, - fetch: bool = True, - token_or_key: str | None = None, - **params: str - ) -> mutable_furl: ... - - -@attrs.frozen -class InvalidManifestKey(Exception): - value: str - - -class AbstractManifestKey(metaclass=ABCMeta): - """ - The root of the manifest key class hierarchy. The hierarchy expresses the - basic security constraints on manifest keys as they are sent through - potentially insecure channels. This class defines the methods for - (de)serializing a manifest key using a somewhat space-efficient - binary "packed" representation. - """ - - @abstractmethod - def pack(self) -> bytes: - raise NotImplementedError - - def encode(self) -> str: - return azul_urlsafe_b64encode(self.pack()) - - @classmethod - @abstractmethod - def unpack(cls, pack: bytes) -> Self: - raise NotImplementedError - - @classmethod - def decode(cls, value: str) -> Self: - try: - return cls.unpack(azul_urlsafe_b64decode(value)) - except Exception as e: - raise InvalidManifestKey(value) from e - - -@attrs.frozen(kw_only=True) -class BareManifestKey(AbstractManifestKey): - """ - An untrusted manifest key. Instances can be freely serialized and - deserialized but the service won't accept them. To obtain a key the service - trusts, use an instance of :class:`ManifestKey` that was returned by the - service. - - To send a manifest key through an an untrusted channel, it must first be - signed using :meth:`ManifestService.verify_manifest_key_signature`. After - reading it from the untrusted channel the signature must be verified using - :meth:`ManifestService.verify_manifest_key_signature`. - - >>> manifest_key = BareManifestKey(catalog='foo', - ... format=ManifestFormat.curl, - ... manifest_hash=UUID('d2b0ce3c-46f0-57fe-b9d4-2e38d8934fd4'), - ... source_hash=UUID('77936747-5968-588e-809f-af842d6be9e0')) - - >>> manifest_key.encode() - 'lKNmb2-kY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4A' - - The encode() method is the inverse of decode(): - - >>> BareManifestKey.decode(manifest_key.encode()) == manifest_key - True - - Invalid base64: - - >>> BareManifestKey.decode(manifest_key.encode()[:-1]) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.service.manifest_service.InvalidManifestKey: - lKNmb2-kY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4 - - Valid base64 encoding and msgpack format, but value of wrong type for - `catalog` atrribute - - >>> with attrs.validators.disabled(): - ... # noinspection PyTypeChecker - ... bad_key = attrs.evolve(manifest_key, catalog=123).encode() - >>> bad_key - 'lHukY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4A' - - >>> BareManifestKey.decode(bad_key) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.service.manifest_service.InvalidManifestKey: - lHukY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4A - - >>> bad_key = base64.b64encode(manifest_key.pack() + b'123').decode() - >>> BareManifestKey.decode(bad_key) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.service.manifest_service.InvalidManifestKey: - lKNmb2+kY3VybMQQ0rDOPEbwV/651C442JNP1MQQd5NnR1loWI6An6+ELWvp4DEyMw== - - >>> bad_key = base64.b64encode(manifest_key.pack()[:-1]).decode() - >>> BareManifestKey.decode(bad_key) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.service.manifest_service.InvalidManifestKey: - lKNmb2+kY3VybMQQ0rDOPEbwV/651C442JNP1MQQd5NnR1loWI6An6+ELWvp - - Manifest keys contain the catalog name which can be quite long, extending - the length of the encoded manifest key proportionally by 4 characters for - every 3 catalog name characters. - - >>> manifest_key = BareManifestKey(catalog='a' * 64, - ... format=ManifestFormat.terra_pfb, - ... manifest_hash=UUID('d2b0ce3c-46f0-57fe-b9d4-2e38d8934fd4'), - ... source_hash=UUID('77936747-5968-588e-809f-af842d6be9e0')) - >>> len(manifest_key.encode()) - 151 - """ - catalog: CatalogName = strict_auto() - format: ManifestFormat = strict_auto() - manifest_hash: UUID = attrs.field(validator=is_uuid(5)) - source_hash: UUID = attrs.field(validator=is_uuid(5)) - - def pack(self) -> bytes: - return msgpack.packb([ - self.catalog, - self.format.value, - self.manifest_hash.bytes, - self.source_hash.bytes, - ]) - - @classmethod - def unpack(cls, pack: bytes) -> Self: - i = iter(msgpack.unpackb(pack)) - return cls(catalog=next(i), - format=ManifestFormat(next(i)), - manifest_hash=UUID(bytes=next(i)), - source_hash=UUID(bytes=next(i))) - - -@attrs.frozen(kw_only=True) -class SignedManifestKey(AbstractManifestKey): - """ - A bare manifest key and its signature. - - >>> bare_manifest_key = BareManifestKey(catalog='foo', - ... format=ManifestFormat.curl, - ... manifest_hash=UUID('d2b0ce3c-46f0-57fe-b9d4-2e38d8934fd4'), - ... source_hash=UUID('77936747-5968-588e-809f-af842d6be9e0')) - >>> manifest_key = SignedManifestKey(value=bare_manifest_key, - ... signature=b'123') - - >>> manifest_key.encode() - 'ksQulKNmb2-kY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4MQDMTIz' - - >>> SignedManifestKey.decode(manifest_key.encode()) == manifest_key - True - """ - value: BareManifestKey = strict_auto() - signature: bytes = strict_auto() - - def pack(self) -> bytes: - return msgpack.packb([ - self.value.pack(), - self.signature - ]) - - @classmethod - def unpack(cls, pack: bytes) -> Self: - i = iter(msgpack.unpackb(pack)) - return cls(value=BareManifestKey.unpack(next(i)), - signature=next(i)) - - -class ManifestKey(BareManifestKey): - """ - A manifest key that the service trusts implicitly. It is assumed to have - either been instantiated by the service itself and transmitted exclusively - over secure channels, or to have been extracted from a signed manifest key - after signature verification. - - >>> manifest_key = ManifestKey(catalog='foo', - ... format=ManifestFormat.curl, - ... manifest_hash=UUID('d2b0ce3c-46f0-57fe-b9d4-2e38d8934fd4'), - ... source_hash=UUID('77936747-5968-588e-809f-af842d6be9e0')) - - Encoded representation is short: - - >>> manifest_key.encode() - 'lKNmb2-kY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4A' - - It shouldn't be possible to deserialize a ManifestKey instance. - - >>> ManifestKey.decode(manifest_key.encode()) - ... # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - azul.service.manifest_service.InvalidManifestKey: - lKNmb2-kY3VybMQQ0rDOPEbwV_651C442JNP1MQQd5NnR1loWI6An6-ELWvp4A - - The from_json() method is the inverse of to_json(): - - >>> ManifestKey.from_json(manifest_key.to_json()) == manifest_key - True - """ - - @classmethod - def unpack(cls, pack: bytes) -> None: - """ - Do not call this method. It is unsafe to deserialize an instance of - this class. Instead, deserialize a :class:`SignedManifestKey` and use - :meth:`ManifestService.verify_manifest_key_signature`. - """ - assert False - - def to_json(self) -> JSON: - return { - 'catalog': self.catalog, - 'format': self.format.value, - 'manifest_hash': str(self.manifest_hash), - 'source_hash': str(self.source_hash) - } - - @classmethod - def from_json(cls, json: JSON) -> Self: - return cls(catalog=json['catalog'], - format=ManifestFormat(json['format']), - manifest_hash=UUID(json['manifest_hash']), - source_hash=UUID(json['source_hash'])) - - _uuid_namespace: ClassVar[UUID] = UUID('c5a0cd95-44f7-4216-972f-623f00f8fd22') - - @property - def uuid(self) -> UUID: - return uuid5_for_bytes(self._uuid_namespace, self.pack()) - - -@attrs.frozen -class InvalidManifestKeySignature(Exception): - value: SignedManifestKey - - -@attrs.frozen(kw_only=True) -class Manifest: - """ - Contains the details of a prepared manifest. - """ - #: The S3 object key under which the manifest is stored in the storage - #: bucket - object_key: str - - #: True if an existing manifest was reused or False if a new manifest was - #: generated. - was_cached: bool - - #: The format of the manifest - format: ManifestFormat - - #: Uniquely identifies this manifest - manifest_key: ManifestKey - - #: The proposed file name of the manifest when downloading it to a user's - #: system - file_name: str - - def to_json(self) -> JSON: - return { - 'object_key': self.object_key, - 'was_cached': self.was_cached, - 'format': self.format.value, - 'manifest_key': self.manifest_key.to_json(), - 'file_name': self.file_name - } - - @classmethod - def from_json(cls, json: JSON) -> Self: - return cls(object_key=json['object_key'], - was_cached=json['was_cached'], - format=ManifestFormat(json['format']), - manifest_key=ManifestKey.from_json(json['manifest_key']), - file_name=json['file_name']) - - -def tuple_or_none(v): - return v if v is None else tuple(v) - - -@attrs.frozen(kw_only=True) -class ManifestPartition: - """ - A partial manifest. An instance of this class encapsulates the state that - might need to be tracked while a manifest is populated, in increments of - partitions, or even pages within partitions. The simplest of manifests - consist of just one big partition that's not split into pages. These - monolithic manifests come at a price: the size of the manifest must be no - more than what fits into memory at once. - """ - #: The 0-based index of the partition - index: int - - #: True if this is the last partition - is_last: bool - - #: The file name to use for a manifest that contains this partition. While - #: this attribute may seem misplaced, the file name is derived from the - #: contents of the ES hits that make up the manifest rows. If a manifest is - #: partitioned, we need to track the state of that derivation somewhere. - #: On the last partition, this attribute is not None and represents the file - #: name to be used. On the other partitions this attribute may be None, if - #: it isn't, it represents the base name, the manifest content-dependent - #: portion of the file name. If all pages of all partitions yield the same - #: base name, the file name on the last partition will incorporate the base - #: name. Otherwise, a generic, content-independent file name will be used. - file_name: str | None = None - - #: The cached configuration of the manifest that contains this partition. - #: Manifest generators whose `manifest_config` property is expensive should - #: cache the returned value here for subsequent partitions to reuse. - config: AnyJSON | None = None - - #: The ID of the S3 multi-part upload this partition is a part of. If a - #: manifest consists of just one partition, this may be None, but it doesn't - #: have to be. - multipart_upload_id: str | None = None - - #: The S3 ETag of each partition; the current one and all the ones before it - part_etags: tuple[str, ...] | None = attrs.field(converter=tuple_or_none, - default=None) - - #: The index of the current page. The index is zero-based and global. For - #: example, if the first partition contains five pages, the index of the - #: first page in the second partition is 5. This is None for manifests whose - #: partitions aren't split into pages. - page_index: int | None = None - - #: True if the current page is the last page of the entire manifest. This is - #: None for manifests whose partitions aren't split into pages. - is_last_page: bool | None = None - - #: The `sort` value of the first hit of the current page in this partition, - #: or None if there is no current page. - search_after: SortKey | None = None - - @classmethod - def from_json(cls, partition: JSON) -> Self: - return cls(**{ - k: tuple(v) if k == 'search_after' and v is not None else v - for k, v in partition.items() - }) - - def to_json(self) -> MutableJSON: - return attrs.asdict(self) - - @classmethod - def first(cls) -> Self: - return cls(index=0, - is_last=False) - - @property - def is_first(self) -> bool: - return not (self.index or self.page_index) - - def with_config(self, config: AnyJSON) -> Self: - return attrs.evolve(self, config=config) - - def with_upload(self, multipart_upload_id) -> Self: - return attrs.evolve(self, - multipart_upload_id=multipart_upload_id, - part_etags=()) - - def first_page(self) -> Self: - assert self.index == 0, self - return attrs.evolve(self, - page_index=0, - is_last_page=False) - - def next_page(self, - file_name: str | None, - search_after: SortKey | None - ) -> Self: - assert self.page_index is not None, self - # If different pages yield different file names, use default file name - if self.page_index > 0: - if file_name != self.file_name: - file_name = None - return attrs.evolve(self, - page_index=self.page_index + 1, - file_name=file_name, - search_after=search_after) - - def last_page(self) -> Self: - return attrs.evolve(self, is_last_page=True) - - def next(self, part_etag: str) -> Self: - return attrs.evolve(self, - index=self.index + 1, - part_etags=(*self.part_etags, part_etag)) - - def last(self, file_name: str) -> Self: - return attrs.evolve(self, - file_name=file_name, - is_last=True) - - -@attrs.frozen -class CachedManifestNotFound(Exception): - manifest_key: ManifestKey - - -class ManifestService(ElasticsearchService): - - def __init__(self, storage_service: StorageService, file_url_func: FileUrlFunc): - super().__init__() - self.storage_service = storage_service - self.file_url_func = file_url_func - - def get_manifest(self, - *, - format: ManifestFormat, - catalog: CatalogName, - filters: Filters, - partition: ManifestPartition, - manifest_key: ManifestKey | None = None - ) -> Manifest | ManifestPartition: - """ - Return a fully populated manifest that ends with the given partition or - the next partition if the given partition isn't the last. - - If a manifest is returned, its 'location' attribute contains the - pre-signed URL of a manifest in the given format, and containing file - entities matching the given filter. - - If a suitable manifest already exists, it will be used and returned - immediately. Otherwise, a new manifest will be generated. Subsequent - invocations of this method with the same arguments are likely to reuse - that manifest, skipping the time-consuming manifest generation. - - If a manifest needs to be generated and the generation involves multiple - partitions, this method will only generate one partition and return - the next one. Repeat calling this method with the returned partition - until the return value is a Manifest instance. - - :param format: The desired format of the manifest. - - :param catalog: The name of the catalog to generate the manifest from. - - :param filters: The filters by which to restrict the contents of the - manifest. - - :param partition: The manifest partition to generate. Not all manifests - involve multiple partitions. If they don't, a Manifest - instance will be returned. Otherwise, the next - ManifestPartition instance will be returned. - - :param manifest_key: An optional key identifying the cached manifest. If - None, the key will be computed dynamically. This - may take a few seconds. If a valid cached manifest - exists under the given key, it will be used. - Otherwise, a new manifest will be created and - stored under the given key. - """ - generator_cls = ManifestGenerator.cls_for_format(format) - generator = generator_cls(self, catalog, filters) - if manifest_key is None: - manifest_key = generator.manifest_key() - if partition.is_first: - try: - return self._get_cached_manifest(generator_cls, manifest_key) - except CachedManifestNotFound: - return self._generate_manifest(generator, manifest_key, partition) - else: - return self._generate_manifest(generator, manifest_key, partition) - - def _generate_manifest(self, - generator: 'ManifestGenerator', - manifest_key: ManifestKey, - partition: ManifestPartition - ) -> Manifest | ManifestPartition: - partition = generator.write(manifest_key, partition) - if partition.is_last: - return self._make_manifest(generator_cls=type(generator), - manifest_key=manifest_key, - file_name=partition.file_name, - was_cached=False) - else: - return partition - - def get_cached_manifest(self, - format: ManifestFormat, - catalog: CatalogName, - filters: Filters - ) -> Manifest: - generator_cls = ManifestGenerator.cls_for_format(format) - generator = generator_cls(self, catalog, filters) - manifest_key = generator.manifest_key() - return self._get_cached_manifest(generator_cls, manifest_key) - - @classmethod - def sign_manifest_key(cls, manifest_key: ManifestKey) -> SignedManifestKey: - """ - Sign the given manifest key with a secret so that it can later be - verified to have not been tamplered with. - """ - response = aws.kms.generate_mac(Message=manifest_key.pack(), - KeyId=config.manifest_kms_alias, - MacAlgorithm='HMAC_SHA_256') - return SignedManifestKey(value=manifest_key, - signature=response['Mac']) - - @classmethod - def verify_manifest_key(cls, manifest_key: SignedManifestKey) -> ManifestKey: - """ - Verify a manifest key against its signature. If either the key or the - signature have been tampered with, an exception will be raised. - """ - try: - response = aws.kms.verify_mac(KeyId=config.manifest_kms_alias, - MacAlgorithm='HMAC_SHA_256', - Message=manifest_key.value.pack(), - Mac=manifest_key.signature) - except aws.kms.exceptions.KMSInvalidMacException: - raise InvalidManifestKeySignature(manifest_key) - else: - assert response['MacValid'] - return ManifestKey(**attrs.asdict(manifest_key.value)) - - def get_cached_manifest_with_key(self, manifest_key: ManifestKey) -> Manifest: - generator_cls = ManifestGenerator.cls_for_format(manifest_key.format) - return self._get_cached_manifest(generator_cls, manifest_key) - - def _get_cached_manifest(self, - generator_cls: type['ManifestGenerator'], - manifest_key: ManifestKey - ) -> Manifest: - file_name = self._get_cached_manifest_file_name(generator_cls, manifest_key) - if file_name is None: - raise CachedManifestNotFound(manifest_key) - else: - return self._make_manifest(generator_cls=generator_cls, - manifest_key=manifest_key, - file_name=file_name, - was_cached=True) - - def _make_manifest(self, - generator_cls: type['ManifestGenerator'], - manifest_key: ManifestKey, - file_name: str | None, - was_cached: bool - ) -> Manifest: - if not generator_cls.use_content_disposition_file_name: - file_name = None - object_key = generator_cls.s3_object_key(manifest_key) - return Manifest(object_key=object_key, - was_cached=was_cached, - format=generator_cls.format(), - manifest_key=manifest_key, - file_name=file_name) - - def get_manifest_url(self, manifest: Manifest) -> str: - return self.storage_service.get_presigned_url(key=manifest.object_key, - file_name=manifest.file_name) - - file_name_tag = 'azul_file_name' - - def _get_cached_manifest_file_name(self, - generator_cls: type['ManifestGenerator'], - manifest_key: ManifestKey - ) -> str | None: - """ - Return the proposed local file name of the manifest with the given - object key if it was previously created, still exists in the bucket, and - won't be expiring soon. Otherwise return None. - - :param generator_cls: The generator class of the manifest - - :param manifest_key: The key of the cached manifest - """ - object_key = generator_cls.s3_object_key(manifest_key) - try: - time_left = self.storage_service.time_until_object_expires(object_key, - expiration=config.manifest_expiration) - except StorageObjectNotFound: - log.info('Cached manifest not found: %s', manifest_key) - return None - else: - if time_left > config.manifest_expiration_margin: - tagging = self.storage_service.get_object_tagging(object_key) - try: - encoded_file_name = tagging[self.file_name_tag] - except KeyError: - # While unpaged manifest generators apply the tag *at* - # object creation, paged ones do so in a separate request. - # Reaching this point for a paged manifest (no name tag) - # means that the manifest has been created but not yet - # tagged. In this case, we treat the manifest as if it - # doesn't yet exist and return None. This assumes that the - # caller will then raise a `CachedManifestNotFound` - # exception causing a redirect response to the client and - # when the client follows the redirect, the tagging should - # be complete. - return None - else: - encoded_file_name = encoded_file_name.encode('ascii') - return base64.urlsafe_b64decode(encoded_file_name).decode('utf-8') - else: - log.info('Cached manifest is about to expire: %s', object_key) - return None - - def command_lines(self, - manifest: Manifest | None, - url: furl, - authentication: Authentication | None - ) -> FlatJSON: - format = None if manifest is None else manifest.format - generator_cls = ManifestGenerator.cls_for_format(format) - file_name = None if manifest is None else manifest.file_name - return generator_cls.command_lines(url, file_name, authentication) - - -Cells = dict[str, str] - - -class ManifestGenerator(metaclass=ABCMeta): - """ - A generator for manifests. A manifest is an exhaustive representation of - the documents in the aggregate index for a particular entity type. The - generator queries that index for documents that match a given filter and - transforms the result. - """ - - # Note to implementors: all property getters in this class and its - # descendants must be inexpensive. If a property getter performs and - # expensive computation or I/O, it should cache its return value. - - @classmethod - @abstractmethod - def format(cls) -> ManifestFormat: - """ - Returns the manifest format implemented by this generator class. - """ - raise NotImplementedError - - @cached_property - def repository_plugin(self) -> RepositoryPlugin: - catalog = self.catalog - return RepositoryPlugin.load(catalog).create(catalog) - - @property - def metadata_plugin(self) -> MetadataPlugin: - return self.service.metadata_plugin(self.catalog) - - @classmethod - @abstractmethod - def file_name_extension(cls) -> str: - """ - The file name extension to use when persisting the output of this - generator to a file system or an object store. - """ - raise NotImplementedError - - @property - @abstractmethod - def content_type(self) -> str: - """ - The MIME type to use when describing the output of this generator. - """ - raise NotImplementedError - - @classmethod - def use_content_disposition_file_name(cls) -> bool: - """ - True if the manifest output produced by the generator should use a custom - file name when stored on a file system. - """ - return True - - @property - @abstractmethod - def entity_type(self) -> str: - """ - The type of the index entities this generator consumes. This controls - which aggregate Elasticsearch index is queried to fetch the aggregate - entity documents that this generator consumes when generating the - output manifest. - """ - raise NotImplementedError - - @cached_property - def manifest_config(self) -> ManifestConfig: - """ - The manifest config this generator uses. A manifest config is a mapping - from document properties to manifest fields. - """ - return self.metadata_plugin.manifest_config - - @cached_property - def included_fields(self) -> list[FieldPath] | None: - """ - A list of field paths to be included when requesting entity documents - from the index or None if all fields should be included. - - https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-fields.html#source-filtering - """ - return [ - (*field_path, field_name) - for field_path, column_mapping in self.manifest_config.items() - for field_name in column_mapping.keys() - if field_name is not None - ] - - _cls_for_format: dict[ManifestFormat, type['ManifestGenerator']] = {} - - def __init_subclass__(cls) -> None: - super().__init_subclass__() - if not isabstract(cls): - format = cls.format() - assert format not in cls._cls_for_format - cls._cls_for_format[format] = cls - - @classmethod - def cls_for_format(cls, - format: ManifestFormat | None - ) -> type['ManifestGenerator']: - """ - Return the generator class for the given format. - - :param format: format specifying which type of generator to use - - :return: a concrete subclass of ManifestGenerator - """ - if format is None: - return cls - else: - return cls._cls_for_format[format] - - @classmethod - def _cmd_exe_quote(cls, s: str) -> str: - """ - Escape a string for insertion into a `cmd.exe` command line - """ - assert '\\' not in s, s - return dq(s) - - @classmethod - def command_lines(cls, - url: furl, - file_name: str | None, - authentication: Authentication | None - ) -> FlatJSON: - # Normally we would have used --remote-name and --remote-header-name - # which gets the file name from the content-disposition header. However, - # URLs longer than 255 characters trigger a bug in curl.exe's - # implementation of --remote-name on Windows. This is especially - # surprising because --remote-name doesn't need to parse the URL when - # --remote-header-name is also passed. To circumvent the URL parsing - # bug we provide the file name explicitly with --output. - - # Normally, curl writes the response body and returns 0 (success), - # even on server errors. With --fail, it writes an error message - # containing the HTTP status code and exits with 22 in those cases. - def options(quote_func): - return [] if file_name is None else [ - '--location', - '--fail', - '--output', - quote_func(file_name) - ] - - return { - 'cmd.exe': ' '.join([ - 'curl.exe', - *options(cls._cmd_exe_quote), - cls._cmd_exe_quote(str(url)) - ]), - 'bash': ' '.join([ - 'curl', - *options(shlex.quote), - shlex.quote(str(url)) - ]) - } - - def __init__(self, - service: ManifestService, - catalog: CatalogName, - filters: Filters - ) -> None: - """ - Construct a generator instance. - - :param catalog: the name of the catalog to use when querying the index - for the documents to be transformed into the manifest - - :param filters: the filter to use when querying the index for the - documents to be transformed into the manifest - - :param service: the service to use when querying the index - """ - super().__init__() - self.service = service - self.catalog = catalog - self.filters = filters - self.file_url_func = service.file_url_func - - manifest_namespace = UUID('ca1df635-b42c-4671-9322-b0a7209f0235') - - source_namespace = UUID('6540b139-ea49-4e36-8f19-17c309b5fa76') - - def manifest_key(self) -> ManifestKey: - """ - Return a manifest object key deterministically derived from this - generator's parameters (its concrete type and the arguments passed to - its constructor) and the current commit hash. The same parameters will - always produce the same return value in one revision of this code. - Different parameters should, with a very high probability, produce - different return values. - """ - git_commit = config.lambda_git_status['commit'] - filter_string = repr(sort_frozen(freeze(self.filters.explicit))) - content_hash = str(self.manifest_content_hash) - catalog = self.catalog - format = self.format() - manifest_hash_input = [ - git_commit, - catalog, - format.value, - content_hash, - filter_string - ] - joiner = ',' - assert not any(joiner in param for param in manifest_hash_input[:-1]) - manifest_hash = uuid5(self.manifest_namespace, joiner.join(manifest_hash_input)) - - source_ids = sorted(self.filters.source_ids) - assert not any(joiner in source_id for source_id in source_ids), source_ids - source_hash = uuid5(self.source_namespace, joiner.join(source_ids)) - - return ManifestKey(catalog=catalog, - format=format, - manifest_hash=manifest_hash, - source_hash=source_hash) - - @classmethod - def s3_object_key(cls, manifest_key: ManifestKey) -> str: - return 'manifests' + '/' + cls.s3_object_key_base(manifest_key) - - @classmethod - def s3_object_key_base(cls, manifest_key: ManifestKey) -> str: - manifest_hash = str(manifest_key.manifest_hash) - source_hash = str(manifest_key.source_hash) - for part in manifest_hash, source_hash: - for joiner in '.', '/': - assert joiner not in part, (joiner, part) - return '.'.join([manifest_hash, source_hash, cls.file_name_extension()]) - - def file_name(self, - manifest_key: ManifestKey, - base_name: str | None = None - ) -> str: - if base_name: - file_name_prefix = unicodedata.normalize('NFKD', base_name) - file_name_prefix = re.sub(r'[^\w ,.@%&\-_()\\[\]/{}]', '_', file_name_prefix).strip() - timestamp = datetime.now().strftime('%Y-%m-%d %H.%M') - file_name = f'{file_name_prefix} {timestamp}.{self.file_name_extension()}' - else: - atlas = config.catalogs[self.catalog].atlas - file_name = atlas + '-manifest-' + self.s3_object_key_base(manifest_key) - return file_name - - def _create_request(self) -> Search: - pipeline = self._create_pipeline() - request = self.service.create_request(self.catalog, self.entity_type) - request = pipeline.prepare_request(request) - # The response is processed by the generator, not the pipeline - return request - - def _create_pipeline(self) -> ElasticsearchChain: - if self.included_fields is None: - document_slice = DocumentSlice() - else: - document_slice = DocumentSlice(includes=list(map(dotted, self.included_fields))) - pipeline = self.service.create_chain(catalog=self.catalog, - entity_type=self.entity_type, - filters=self.filters, - post_filter=False, - document_slice=document_slice) - return pipeline - - def _hit_to_doc(self, hit: Hit) -> MutableJSON: - return self.service.translate_fields(self.catalog, - hit.to_dict(), - forward=False, - allowed_paths=self.included_fields) - - column_joiner = config.manifest_column_joiner - padded_joiner = ' ' + column_joiner + ' ' - - @cached_property - def _field_types(self) -> FieldTypes: - return self.service.field_types(self.catalog) - - def _extract_fields(self, - *, - field_path: FieldPath, - entities: JSONs, - column_mapping: ColumnMapping, - row: Cells) -> None: - """ - Extract columns in `column_mapping` from `entities` and insert values - into `row`. - """ - field_types = self._field_types - for field in field_path: - field_types = field_types[field] - - def convert(field_name, field_value): - try: - field_type = field_types[field_name] - except KeyError: - if field_name == 'file_url': - field_type = null_str - else: - raise - else: - if isinstance(field_type, list): - field_type = one(field_type) - return field_type.to_tsv(field_value) - - def validate(field_value: str) -> str: - assert self.column_joiner not in field_value - return field_value - - for field_name, column_name in column_mapping.items(): - if column_name is not None: - assert column_name not in row, f'Column mapping defines {column_name} twice' - column_value = [] - for entity in entities: - try: - field_value = entity[field_name] - except KeyError: - pass - else: - if isinstance(field_value, list): - column_value += [ - validate(convert(field_name, field_sub_value)) - for field_sub_value in field_value - if field_sub_value is not None - ] - else: - column_value.append(validate(convert(field_name, field_value))) - # FIXME: The slice is a hotfix. Reconsider. - # https://github.com/DataBiosphere/azul/issues/2649 - column_value = self.padded_joiner.join(sorted(set(column_value))[:100]) - row[column_name] = column_value - - def _get_entities(self, field_path: FieldPath, doc: JSON) -> JSONs: - """ - Given a document and a dotted path into that document, return the list - of entities designated by that path. - """ - assert field_path, field_path - d = doc - for key in field_path[:-1]: - d = d.get(key, {}) - entities = d.get(field_path[-1], []) - return entities - - def _azul_file_url(self, - file: JSON, - args: Mapping = frozendict() - ) -> str | None: - download_cls = self.repository_plugin.file_download_class() - if download_cls.needs_drs_uri and file['drs_uri'] is None: - return None - else: - return str(self.file_url_func(catalog=self.catalog, - file_uuid=file['uuid'], - version=file['version'], - fetch=False, - **args)) - - @cached_property - def manifest_content_hash(self) -> int: - log.debug('Computing content hash for manifest using filters %r ...', self.filters) - start_time = time.time() - request = self._create_request() - request.aggs.metric( - 'hash', - 'scripted_metric', - init_script=''' - state.fields = 0 - ''', - map_script=''' - for (bundle in params._source.bundles) { - state.fields += (bundle.uuid + bundle.version).hashCode() - } - ''', - combine_script=''' - return state.fields.hashCode() - ''', - reduce_script=''' - int result = 0; - for (state in states) { - result += state - } - return result - ''') - request = request.extra(size=0) - response = request.execute() - assert len(response.hits) == 0 - hash_value = response.aggregations.hash.value - log.info('Manifest content hash %i was computed in %.3fs using filters %r.', - hash_value, time.time() - start_time, self.filters) - return hash_value - - def tagging(self, file_name: str | None) -> Mapping[str, str] | None: - if file_name is None: - return None - else: - encoded_file_name = base64.urlsafe_b64encode(file_name.encode('utf-8')) - return {self.service.file_name_tag: encoded_file_name.decode('ascii')} - - @abstractmethod - def write(self, - manifest_key: ManifestKey, - partition: ManifestPartition, - ) -> ManifestPartition: - """ - Write the given partition of a manifest to object storage under the - specified key and return the next partition to be written. Unless the - returned partition is the last one, this method will soon be invoked - again, passing the partition returned by the previous invocation. - - A minimal implementation of this method would write the entire manifest - in just one large partition and return that partition with the is_last - flag set. - - :param manifest_key: The manifest key under which to store the manifest - partition. - - :param partition: The partition to write. - """ - raise NotImplementedError - - @property - def storage(self): - return self.service.storage_service - - -class ClientSidePagingManifestGenerator(ManifestGenerator, metaclass=ABCMeta): - """ - A mixin for manifest generators that use client-side paging to query - Elasticsearch. - """ - page_size = 500 - - def _create_paged_request(self, search_after: SortKey | None) -> Search: - pagination = Pagination(sort='entryId', - order='asc', - size=self.page_size, - search_after=search_after) - pipeline = self._create_pipeline() - # Only needs this to satisfy the type constraints - pipeline = ToDictStage(service=self.service, - catalog=self.catalog, - entity_type=self.entity_type).wrap(pipeline) - pipeline = PaginationStage(service=self.service, - catalog=self.catalog, - entity_type=self.entity_type, - pagination=pagination, - filters=self.filters, - peek_ahead=False).wrap(pipeline) - request = self.service.create_request(catalog=self.catalog, - entity_type=self.entity_type) - # The response is processed by the generator, not the pipeline - request = pipeline.prepare_request(request) - return request - - def _search_after(self, hit: Hit) -> SortKey: - a, b = hit.meta.sort - return a, b - - -class PagedManifestGenerator(ClientSidePagingManifestGenerator): - """ - A manifest generator whose output is split over several concatenable - segments, also known as pages. - - In some subclasses, e.g. CompactManifestGenerator and CurlManifestGenerator, - a manifest page corresponds to a page of hits from a paginated Elasticsearch - request. In others, e.g. JSONLVerbatimManifestGenerator, the relationship - between manifest pages and Elasticsearch pages is more complicated. - """ - - @abstractmethod - def write_page_to(self, - partition: ManifestPartition, - output: IO[str] - ) -> ManifestPartition: - """ - Write the generator output for the current page of the given partition - to the given stream and return an updated partition object that - represents the next page of the given partition. - - :param partition: the current partition - - :param output: the stream to write to - """ - raise NotImplementedError - - # With the minimum part size of 5 MiB I've observed a running time of only - # 5s per partition so to minimize step function churn we'll go with 50 MiB - # instead. - - part_size = 50 * 1024 * 1024 - - assert part_size >= AWS_S3_DEFAULT_MINIMUM_PART_SIZE - - def write(self, - manifest_key: ManifestKey, - partition: ManifestPartition, - ) -> ManifestPartition: - assert not partition.is_last, partition - if partition.config is None: - # The keys in manifest config are tuples which aren't allowed in - # JSON. We convert the outer mapping to a list of entries. - config = [[list(k), v] for k, v in self.manifest_config.items()] - partition = partition.with_config(config) - else: - config = {tuple(k): v for k, v in partition.config} - type(self).manifest_config.fset(self, config) - object_key = self.s3_object_key(manifest_key) - if partition.multipart_upload_id is None: - upload = self.storage.create_multipart_upload(object_key) - partition = partition.with_upload(upload.id) - else: - upload = self.storage.load_multipart_upload(object_key=object_key, - upload_id=partition.multipart_upload_id) - if partition.page_index is None: - partition = partition.first_page() - with BytesIO() as buffer: - with TextIOWrapper(buffer, encoding='utf-8', write_through=True) as text_buffer: - while True: - partition = self.write_page_to(partition, output=text_buffer) - # Manifest lambda has 2 GB of memory - assert buffer.tell() < 1.5 * 1024 ** 3 - if partition.is_last_page or buffer.tell() > self.part_size: - break - if buffer.tell() > 0: - buffer.seek(0) - part_etag = self.storage.upload_multipart_part(buffer, partition.index + 1, upload) - partition = partition.next(part_etag=part_etag) - if partition.is_last_page: - self.storage.complete_multipart_upload(upload, partition.part_etags) - file_name = self.file_name(manifest_key, base_name=partition.file_name) - tagging = self.tagging(file_name) - if tagging is not None: - self.storage.put_object_tagging(object_key, tagging) - partition = partition.last(file_name) - return partition - - -class FileBasedManifestGenerator(ManifestGenerator): - """ - A manifest generator that writes its output to a file. - - :return: the path to the file containing the output of the generator and an - optional string that should be used to name the output when - persisting it to an object store or another file system - """ - - @abstractmethod - def create_file(self) -> tuple[str, str | None]: - raise NotImplementedError - - def write(self, - manifest_key: ManifestKey, - partition: ManifestPartition, - ) -> ManifestPartition: - """ - Generate the manifest and return the desired content disposition file - name if necessary. - """ - assert partition.index == 0 and partition.page_index is None, partition - file_path, base_name = self.create_file() - file_name = self.file_name(manifest_key, base_name) - try: - self.storage.upload(file_path=file_path, - object_key=(self.s3_object_key(manifest_key)), - content_type=self.content_type, - tagging=self.tagging(file_name)) - finally: - os.remove(file_path) - partition = partition.last(file_name) - return partition - - -class CurlManifestGenerator(PagedManifestGenerator): - - @classmethod - def format(cls) -> ManifestFormat: - return ManifestFormat.curl - - @property - def content_type(self) -> str: - return 'text/plain' - - @classmethod - def file_name_extension(cls): - return 'curlrc' - - @property - def entity_type(self) -> str: - return 'files' - - @cached_property - def included_fields(self) -> list[FieldPath] | None: - return [ - *super().included_fields, - ('contents', 'files', 'related_files') - ] - - @classmethod - def command_lines(cls, - url: furl, - file_name: str | None, - authentication: Authentication | None - ) -> FlatJSON: - authentication_option = [] if authentication is None else [ - '--header', - cls._option(authentication.as_http_header()) - ] - manifest_options = [ - '--location', - '--fail', - ] - rate_limit = config.waf_rate_limit - # Some options are added to the command-line instead of the curl - # manifest so that the user can more easily customize them. - file_options = [ - # We want curl to make enough retries so that it waits a total of - # one and a half times the evaluation window of the WAF rate rule, - # long enough for the tripped rule to clear. - f'--retry {ceil(rate_limit.period * 1.5 / rate_limit.retry_after)}', - # Curl will respect the 'Retry-After' header if given in a response, - # like the one returned when the WAF rate rule is tripped. Otherwise, - # curl will wait for the number of seconds specified here. - '--retry-delay 10', - ] - return { - 'cmd.exe': ' '.join([ - 'curl.exe', - *manifest_options, - cls._cmd_exe_quote(str(url)), - '|', - 'curl.exe', - *authentication_option, - *file_options, - '--config', - '-' - ]), - 'bash': ' '.join([ - 'curl', - *manifest_options, - shlex.quote(str(url)), - '|', - 'curl', - *authentication_option, - *file_options, - '--config', - '-' - ]) - } - - @classmethod - def _option(cls, s: str) -> str: - """ - >>> f = CurlManifestGenerator._option - >>> f('') - '""' - - >>> f('abc') - '"abc"' - - >>> list(map(ord, f('"'))) - [34, 92, 34, 34] - - >>> list(map(ord, f(f('"')))) - [34, 92, 34, 92, 92, 92, 34, 92, 34, 34] - - """ - return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"' - - def write_page_to(self, - partition: ManifestPartition, - output: IO[str] - ) -> ManifestPartition: - - def _write(file: JSON, is_related_file: bool = False): - name = file['name'] - # Related files are indexed differently than normal files (they - # don't have their own document but are listed inside the main - # file's document), so to ensure that the /repository/files - # endpoint can resolve them correctly, their endpoint URLs - # contain additional parameters, so that the endpoint does not - # need to query the index for that information. - args = { - 'requestIndex': 1, - 'fileName': name, - 'drsUri': file['drs_uri'] - } if is_related_file else { - } - - file_url = self._azul_file_url(file, args) - if file_url is None: - output.write(f"# File {file['uuid']!r}, version {file['version']!r} is " - f"currently not available in catalog {self.catalog!r}.\n\n") - else: - # To prevent overwriting one file with another one of the same name - # but different content we nest each file in a folder using the - # bundle UUID. Because a file can belong to multiple bundles we use - # the one with the most recent version. - bundle = max(cast(JSONs, doc['bundles']), key=itemgetter('version', 'uuid')) - output_name = self._sanitize_path(bundle['uuid'] + '/' + name) - output.write(f'url={self._option(file_url)}\n' - f'output={self._option(output_name)}\n\n') - - if partition.page_index == 0: - curl_options = [ - # FIXME: Remove `--http1.1` option - # https://github.com/DataBiosphere/azul/issues/7032 - '--http1.1', # Avoid a bug in curl 8.7.1 where 429s aren't retried with HTTP/2 - '--create-dirs', # Allow curl to create folders - '--compressed', # Request a compressed response - '--location', # Follow redirects - '--globoff', # Prevent '#' in file names from being interpreted as output variables - '--fail', # Upon server error don't save the error message to the file - '--fail-early', # Exit curl with error on the first failure encountered - '--continue-at -', # Resume partially downloaded files - '--write-out "Downloading to: %{filename_effective}\\n\\n"' - ] - output.write('\n\n'.join(curl_options)) - output.write('\n\n') - - request = self._create_paged_request(partition.search_after) - response = request.execute() - if response.hits: - hit = None - for hit in response.hits: - doc = self._hit_to_doc(hit) - file = one(cast(JSONs, doc['contents']['files'])) - _write(file) - for related_file in file['related_files']: - _write(related_file, is_related_file=True) - assert hit is not None - return partition.next_page(file_name=None, - search_after=self._search_after(hit)) - else: - return partition.last_page() - - # Disallow control characters and backslash as they likely indicate an - # injection attack. No useful file name should contain them - # - _malicious_chars = re.compile(r'[\x00-\x1f\\]') - - # Benign occurrences of potentially problematic characters - # - _problematic_chars = re.compile(r'[<>:"|?*]') - - # Disallow slashes anywhere in a path component. Allow a single dot at the - # beginning as long as it's followed by a something other than space or dot. - # Disallow space or dot at the end. Within the path component (anywhere but - # the beginning or end), dots and spaces are allowed, even consecutive ones - # - _valid_path_component = r'\.?[^./ ]([^/]*[^./ ])?' - - # Allow single slashes between path components - # - _valid_path = re.compile(rf'{_valid_path_component}(/{_valid_path_component})*') - - # Reject path components that are special on Windows, courtesy of DOS - # - special_dos_files = { - 'CON', 'PRN', 'AUX', 'NUL', - *(f'{cmd}{i}' for cmd in ['COM', 'LPT'] for i in range(1, 10)) - } - - @classmethod - def _sanitize_path(cls, path: str) -> str: - """ - >>> f = CurlManifestGenerator._sanitize_path - >>> f('foo/bar/\\x1F/file') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Invalid file path', 'foo/bar/\\x1f/file', - 'Control character or backslash at position', 8) - - >>> f('foo/bar/COM6/file') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Invalid file path', 'foo/bar/COM6/file', - 'Use of reserved path component for Windows', {'COM6'}) - - >>> f('foo/bar/ / baz/file') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - AssertionError: R('Invalid file path', 'foo/bar/ / baz/file') - - Substitutions: - - >>> f('<>:"|?*<>:"|?*') - '______________' - - Pass-through: - - >>> f('foo/bar/file.fastq.gz') - 'foo/bar/file.fastq.gz' - - Invalid paths: - - >>> s: str # work around false `Unresolved reference` warning by PyCharm - - >>> all( - ... CurlManifestGenerator._valid_path.fullmatch(s) is None - ... for s in ('', '.', '..', ' ', ' x', 'x ', 'x ', '/', 'x/', '/x', 'x//x') - ... ) - True - - Valid paths: - - >>> all( - ... CurlManifestGenerator._valid_path.fullmatch(s) is not None - ... for s in ('x', '.x', '.x. y', 'x/x', '.x/.y') - ... ) - True - """ - match = cls._malicious_chars.search(path) - assert match is None, R('Invalid file path', path, - 'Control character or backslash at position', match.start()) - - path = cls._problematic_chars.sub('_', path) - - assert cls._valid_path.fullmatch(path) is not None, R('Invalid file path', path) - - components = set(path.split('/')) & cls.special_dos_files - assert not components, R('Invalid file path', path, - 'Use of reserved path component for Windows', components) - - return path - - -class CompactManifestGenerator(PagedManifestGenerator): - - @classmethod - def format(cls) -> ManifestFormat: - return ManifestFormat.compact - - @property - def content_type(self) -> str: - return 'text/tab-separated-values' - - @classmethod - def file_name_extension(cls): - return 'tsv' - - @property - def entity_type(self) -> str: - return 'files' - - @cached_property - def included_fields(self) -> list[FieldPath] | None: - return [ - *super().included_fields, - ('contents', 'files', 'related_files') - ] - - def write_page_to(self, - partition: ManifestPartition, - output: IO[str] - ) -> ManifestPartition: - column_mappings = self.manifest_config.values() - column_mappings = (d.values() for d in column_mappings) - column_names = list(filter(None, chain.from_iterable(column_mappings))) - writer = csv.DictWriter(output, column_names, dialect='excel-tab') - - if partition.page_index == 0: - writer.writeheader() - - request = self._create_paged_request(partition.search_after) - response = request.execute() - if response.hits: - project_short_names = set() - hit = None - for hit in response.hits: - doc = self._hit_to_doc(hit) - assert isinstance(doc, dict) - contents = doc['contents'] - if len(project_short_names) < 2 and 'projects' in contents: - project = one(cast(JSONs, contents['projects'])) - short_names = project['project_short_name'] - project_short_names.update(short_names) - row = {} - related_rows = [] - for field_path, column_mapping in self.manifest_config.items(): - entities = self._get_entities(field_path, doc) - if field_path == ('contents', 'files'): - file = copy_json(one(entities)) - file['file_url'] = self._azul_file_url(file) - entities = [file] - self._extract_fields(field_path=field_path, - entities=entities, - column_mapping=column_mapping, - row=row) - if field_path == ('contents', 'files'): - file = copy_json(one(entities)) - if 'related_files' in file: - field_path = (*field_path, 'related_files') - for related_file in file['related_files']: - related_row = {} - file.update(related_file) - file['file_url'] = self._azul_file_url(file) - self._extract_fields(field_path=field_path, - entities=[file], - column_mapping=column_mapping, - row=related_row) - related_rows.append(related_row) - writer.writerow(row) - for related in related_rows: - row.update(related) - writer.writerow(row) - assert hit is not None - file_name = project_short_names.pop() if len(project_short_names) == 1 else None - return partition.next_page(file_name=file_name, - search_after=self._search_after(hit)) - else: - return partition.last_page() - - -FQID = tuple[str, str] -Qualifier = str - -Group = Mapping[str, Cells] -Groups = list[Group] -Bundle = dict[Qualifier, Groups] -Bundles = dict[FQID, Bundle] - - -class PFBManifestGenerator(FileBasedManifestGenerator): - - @classmethod - def format(cls) -> ManifestFormat: - return ManifestFormat.terra_pfb - - @classmethod - def file_name_extension(cls): - return 'avro' - - @property - def content_type(self) -> str: - return 'application/octet-stream' - - @property - def entity_type(self) -> str: - return 'files' - - @property - def included_fields(self) -> list[FieldPath] | None: - """ - We want all of the metadata because then we can use the field_types() - to generate the complete schema. - """ - return None - - def _all_docs_sorted(self) -> Iterable[JSON]: - request = self._create_request() - request = request.params(preserve_order=True).sort('entity_id.keyword') - for hit in request.scan(): - doc = self._hit_to_doc(hit) - yield doc - - def create_file(self) -> tuple[str, str | None]: - transformers = self.service.transformer_types(self.catalog) - transformer = one(t for t in transformers if t.entity_type() == 'files') - field_types = transformer.field_types() - pfb_schema = avro_pfb.pfb_schema_from_field_types(field_types) - - converter = avro_pfb.PFBConverter(pfb_schema, self.repository_plugin) - for doc in self._all_docs_sorted(): - converter.add_doc(doc) - - links = avro_pfb.pfb_links_from_field_types(field_types) - entity = avro_pfb.pfb_metadata_entity(links) - entities = itertools.chain([entity], converter.entities()) - - fd, path = mkstemp(suffix='.avro') - os.close(fd) - avro_pfb.write_pfb_entities(entities, pfb_schema, path) - return path, None - - -class VerbatimManifestGenerator(ClientSidePagingManifestGenerator, - metaclass=ABCMeta): - - @property - def entity_type(self) -> str: - # Orphans only have projects/datasets as hubs, so we need to retrieve - # aggregates of those types in order to join against orphan replicas - root_entity_type = self.metadata_plugin.root_entity_type - return root_entity_type if self.include_orphans else 'files' - - @property - def included_fields(self) -> list[FieldPath]: - # This is only used when searching the aggregates, which are only used - # to perform a "join" on the replicas index. Therefore, we only need the - # "keys" used for the join. - return [ - ('entity_id',), - *( - ('contents', entity_type, 'document_id') - for entity_type in self.hot_entity_types - ) - ] - - @property - def hot_entity_types(self) -> Iterable[str]: - return self.metadata_plugin.hot_entity_types - - @property - def include_orphans(self) -> bool: - - # When filtering exclusively by properties of implicit hubs, e.g., - # data sets for AnVIL or projects for HCA, we include replicas of all - # entities implicitly connected to the matching hubs, even replicas of - # orphans, i.e., entities that aren't connected to files. - # - plugin = self.metadata_plugin - root_entity_fields = { - field_name - for field_name, field_path in plugin.field_mapping.items() - if field_path[0] == 'contents' and field_path[1] == plugin.root_entity_type - } - - # For both HCA and AnVIL, these root entities are bijective with the - # sources used for indexing, and filtering by a specific project - # or dataset entity should produce the same results as filtering by - # that entity's source. - # - # The verbatim JSONL generator temporarily inserts a source ID condition - # into its provided filters in order to partition the manifest. If the - # source ID field were not included below, that insertion would cause - # orphans to be absent from the manifest, which is incorrect. - # - source_fields = { - plugin.special_fields.source_id, - plugin.special_fields.source_spec - } - return self.filters.explicit.keys() < (root_entity_fields | source_fields) - - @attrs.frozen(kw_only=True) - class ReplicaKeys: - """ - Most replicas contain a list of the entity ID of their hubs, usually - file entities. However, some low-cardinality entities like HCA projects - have too many hubs to track within their replica document. - - This class captures the information needed to locate all replicas - associated with a given a hub entity, either using the hub's entity ID - or the replica's entity ID. - """ - hub_id: str - replica_ids: list[str] - - def _paginate_hits(self, - request_factory: Callable[[SortKey | None], Search] - ) -> Iterable[Hit]: - """ - Yield all hits in every page of Elasticsearch hits in responses to - requests that use client-side paging. - - :param request_factory: A callable that returns a prepared Elasticsearch - request for the given search-after key, with the - appropriate filters and sorting applied. The - returned request should yield one page worth of - hits, starting at the first page (if the argument - is None), or the hit right after the hit with - given search-after key - """ - search_after = None - while True: - request = request_factory(search_after) - response = request.execute() - if response.hits: - hit = None - for hit in response.hits: - yield hit - assert hit is not None - search_after = self._search_after(hit) - else: - break - - def _list_replica_keys(self) -> Iterable[ReplicaKeys]: - for hit in self._paginate_hits(self._create_paged_request): - document_ids = [ - document_id - for entity_type in self.hot_entity_types - for inner_entity in getitem(hit['contents'], entity_type, ()) - # `document_id` is a scalar (string) when the inner and outer - # entity types match, and an array otherwise. `None` should not - # occur. - for document_id in always_iterable(inner_entity['document_id']) - ] - yield self.ReplicaKeys(hub_id=hit['entity_id'], - replica_ids=document_ids) - - def _list_replicas(self) -> Iterable[JSON]: - emitted_replica_ids = set() - for page in chunked(self._list_replica_keys(), self.page_size): - num_replicas = 0 - num_new_replicas = 0 - for replica in self._join_replicas(page): - num_replicas += 1 - # A single replica may have many hubs. To prevent replicas from - # being emitted more than once, we need to keep track of - # replicas already emitted. - replica_id = replica.meta.id - if replica_id not in emitted_replica_ids: - num_new_replicas += 1 - yield replica.to_dict() - emitted_replica_ids.add(replica_id) - log.info('Found %d replicas (%d already emitted) from page of %d hubs', - num_replicas, num_replicas - num_new_replicas, len(page)) - - def _join_replicas(self, keys: Iterable[ReplicaKeys]) -> Iterable[Hit]: - hub_ids, replica_ids = set(), set() - for key in keys: - hub_ids.add(key.hub_id) - replica_ids.update(key.replica_ids) - - request = self.service.create_request(catalog=self.catalog, - entity_type='replica', - doc_type=DocumentType.replica) - request = request.query(Q('bool', should=[ - {'terms': {'hub_ids.keyword': list(hub_ids)}}, - {'terms': {'entity_id.keyword': list(replica_ids)}} - ])) - request = request.extra(size=self.page_size) - - # `_id` is currently the only index field that is unique to each replica - # document (and thus results in an unambiguous total ordering). However, - # sorting just by `_id` is unacceptably slow, an Elasticsearch quirk. To - # overcome the performance hit, we sort by a field that's *almost* - # unique to each replica, so that `_id` only needs to be loaded and - # compared in the infrequent event that it's needed as a tiebreaker. - # - # FIXME: ES DeprecationWarning for using _id as sort key - # https://github.com/DataBiosphere/azul/issues/7290 - # - request = request.sort('entity_id.keyword', '_id') - - def request_factory(search_after: SortKey | None) -> Search: - if search_after is None: - return request - else: - return request.extra(search_after=search_after) - - return self._paginate_hits(request_factory) - - -class JSONLVerbatimManifestGenerator(PagedManifestGenerator, - VerbatimManifestGenerator): - - @property - def content_type(self) -> str: - return 'application/jsonl' - - @classmethod - def file_name_extension(cls) -> str: - return 'jsonl' - - @classmethod - def format(cls) -> ManifestFormat: - return ManifestFormat.verbatim_jsonl - - @property - def source_id_field(self) -> str: - return self.metadata_plugin.special_fields.source_id - - def source_ids(self) -> list[str]: - # Currently, we process each source that might be included in the - # manifest. This can be very inefficient since many partitions may be - # empty for small manifests. A potential optimization is to use a terms - # aggregation to query for the set of nonempty sources before - # processing any hits. - - # It's possible that inaccessible sources are included in the explicit - # sources. If they are, an exception will be raised when the filters are - # reified, so it's safe to skip that check here. - try: - source_filter = self.filters.explicit[self.source_id_field] - except KeyError: - sources = self.filters.source_ids - else: - sources = source_filter['is'] - return sorted(sources) - - def write_page_to(self, - partition: ManifestPartition, - output: IO[str] - ) -> ManifestPartition: - # All replicas from each source must be held in memory simultaneously to - # avoid emitting duplicates. Therefore, each "page" of this manifest - # must retrieve every replica from a given source, using multiple paged - # requests to ElasticSearch if necessary. - source_ids = self.source_ids() - source_id = source_ids[partition.page_index] - log.info('Listing replicas from source %r for manifest page %d', - source_id, partition.page_index) - partition_filter = {self.source_id_field: {'is': [source_id]}} - original_filters = self.filters - try: - self.filters = original_filters.update(partition_filter) - replicas = self._list_replicas() - for replica in replicas: - entry = { - 'value': replica['contents'], - 'type': replica['replica_type'] - } - json.dump(entry, output) - output.write('\n') - finally: - self.filters = original_filters - last_page = len(source_ids) - 1 - if partition.page_index < last_page: - return partition.next_page(file_name=None, search_after=None) - elif partition.page_index == last_page: - return partition.last_page() - else: - assert False, (partition, source_ids) - - -class PFBVerbatimManifestGenerator(FileBasedManifestGenerator, - VerbatimManifestGenerator): - - @property - def content_type(self) -> str: - return 'application/octet-stream' - - @classmethod - def file_name_extension(cls): - return 'avro' - - @classmethod - def format(cls) -> ManifestFormat: - return ManifestFormat.verbatim_pfb - - def _include_relations(self, replica: JSON) -> bool: - # Terra will reject the handover if the manifest includes - # dangling relations, i.e., if any entity references another - # entity that isn't included in the manifest. There are three - # known cases where dangling relations can occur (note that - # currently only the AnVIL plugins support adding relations - # to the manifest): - # - # 1. If an entity occurs in both a replica bundle and a primary - # bundle, but only the replica bundle is indexed, its - # referenced entities may be missing from the index (and - # consequently from the manifest). This can only occur when - # the deployment is configured to index snapshots using a - # common prefix. See - # https://github.com/DataBiosphere/azul/issues/6843 - # - # 2. When using a filter that matches some but not all of the - # files derived from a particular activity, the activity will - # be left with dangling relations to the derived files that - # didn't match the filter. - # - # 3. The `anvil_assayactivity` table includes a foreign key into - # the `anvil_antibody` table. We only index replicas from the - # latter as orphans, so replicas from the former can include - # dangling relations when orphans are not included. - # See https://github.com/DataBiosphere/azul/issues/4440 - # - # (1) can only occur when orphans are included, and (2) and (3) - # can only occur when orphans are *not* included. - # - prefix = SourceSpec.parse_prefix_only(replica['source']['spec']) - return ( - config.enable_verbatim_relations - and self.include_orphans - and not prefix.common - ) - - def create_file(self) -> tuple[str, str | None]: - replicas = list(self._list_replicas()) - plugin = self.metadata_plugin - replica_schemas = plugin.verbatim_pfb_schema(replicas) - # Ensure field order is consistent for unit tests - replica_schemas.sort(key=itemgetter('name')) - links = { - replica_type: plugin.verbatim_pfb_links(replica_type) - for replica_type in ([s['name'] for s in replica_schemas]) - } - pfb_metadata_entity = avro_pfb.pfb_metadata_entity(links) - pfb_schema = avro_pfb.avro_pfb_schema(replica_schemas) - - def pfb_entities(): - yield pfb_metadata_entity - for replica in replicas: - id = plugin.verbatim_pfb_entity_id(replica) - entity = avro_pfb.PFBEntity.for_replica(id, dict(replica)) - # The inclusion of relations is determined on a case-by-case - # basis for each replica, which may result in inconsistent - # expression of relations across rows in the same manifest. - # We chose this approach because scanning all replicas in - # advance would present another obstacle to our goal of - # parallelizing the manifest generation. - if self._include_relations(replica): - relations = plugin.verbatim_pfb_relations(replica) - entity_relations = [ - PFBRelation(dst_name=replica_type, dst_id=entity_id) - for replica_type, entity_id in relations - ] - else: - entity_relations = [] - yield entity.to_json(entity_relations) - - fd, path = mkstemp(suffix=f'.{self.file_name_extension()}') - os.close(fd) - avro_pfb.write_pfb_entities(pfb_entities(), pfb_schema, path) - return path, None diff --git a/src/azul/service/repository_controller.py b/src/azul/service/repository_controller.py deleted file mode 100644 index 5bf7d88e06..0000000000 --- a/src/azul/service/repository_controller.py +++ /dev/null @@ -1,441 +0,0 @@ -from collections.abc import ( - Mapping, - Sequence, -) -import json -import logging -import time -from typing import ( - Any, - Callable, - cast, -) - -import attr -import attrs -from chalice import ( - BadRequestError, - NotFoundError, - TooManyRequestsError, -) - -from azul import ( - CatalogName, - R, - cache, - cached_property, - config, -) -from azul.auth import ( - Authentication, -) -from azul.chalice import ( - ServiceUnavailableError, -) -from azul.collections import ( - adict, -) -from azul.http import ( - LimitedTimeoutException, - TooManyRequestsException, -) -from azul.indexer.field import ( - FieldType, - pass_thru_bool, -) -from azul.indexer.mirror_service import ( - BaseMirrorService, - MirrorFileDownload, -) -from azul.plugins import ( - File, - RepositoryPlugin, -) -from azul.service import ( - BadArgumentException, -) -from azul.service.app_controller import ( - Mandatory, - validate_catalog, - validate_params, -) -from azul.service.elasticsearch_service import ( - IndexNotFoundError, - Pagination, -) -from azul.service.repository_service import ( - EntityNotFoundError, - RepositoryService, -) -from azul.service.source_controller import ( - SourceController, -) -from azul.types import ( - JSON, - is_optional, -) -from azul.uuids import ( - InvalidUUIDError, -) - -log = logging.getLogger(__name__) - - -class RepositoryController(SourceController): - - @cached_property - def service(self) -> RepositoryService: - return RepositoryService() - - @cache - def mirror_service(self, catalog: CatalogName) -> BaseMirrorService: - return BaseMirrorService(catalog=catalog) - - @cache - def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return RepositoryPlugin.load(catalog).create(catalog) - - def search(self, - *, - catalog: CatalogName, - entity_type: str, - item_id: str | None, - filters: str | None, - pagination: Pagination, - authentication: Authentication - ) -> JSON: - filters = self.get_filters(catalog, authentication, filters) - try: - response = self.service.search(catalog=catalog, - entity_type=entity_type, - file_url_func=self.file_url_func, - item_id=item_id, - filters=filters, - pagination=pagination) - except (BadArgumentException, InvalidUUIDError) as e: - raise BadRequestError(e) - except (EntityNotFoundError, IndexNotFoundError) as e: - raise NotFoundError(e) - return cast(JSON, response) - - def summary(self, - *, - catalog: CatalogName, - filters: str, - authentication: Authentication - ) -> JSON: - filters = self.get_filters(catalog, authentication, filters) - try: - response = self.service.summary(catalog, filters) - except BadArgumentException as e: - raise BadRequestError(e) - return cast(JSON, response) - - def _parse_range_request_header(self, - range_specifier: str - ) -> Sequence[tuple[int | None, int | None]]: - """ - >>> # noinspection PyTypeChecker - >>> rc = RepositoryController(app=None, file_url_func=None) - >>> rc._parse_range_request_header('bytes=100-200,300-400') - [(100, 200), (300, 400)] - - >>> rc._parse_range_request_header('bytes=-100') - [(None, 100)] - - >>> rc._parse_range_request_header('bytes=100-') - [(100, None)] - - >>> rc._parse_range_request_header('foo=100') - [] - - >>> rc._parse_range_request_header('') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier '' - - >>> rc._parse_range_request_header('100-200') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier '100-200' - - >>> rc._parse_range_request_header('bytes=') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier 'bytes=' - - >>> rc._parse_range_request_header('bytes=100') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier 'bytes=100' - - >>> rc._parse_range_request_header('bytes=-') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier 'bytes=-' - - >>> rc._parse_range_request_header('bytes=--') - Traceback (most recent call last): - ... - chalice.app.BadRequestError: Invalid range specifier 'bytes=--' - """ - - def to_int_or_none(value: str) -> int | None: - return None if value == '' else int(value) - - parsed_ranges = [] - try: - unit, ranges = range_specifier.split('=') - if unit == 'bytes': - for range_spec in ranges.split(','): - start, end = range_spec.split('-') - assert start != '' or end != '', R('Empty range') - parsed_ranges.append((to_int_or_none(start), to_int_or_none(end))) - else: - assert unit != '', R('Empty range unit') - except Exception as e: - raise BadRequestError(f'Invalid range specifier {range_specifier!r}') from e - return parsed_ranges - - def download_file(self, - catalog: CatalogName, - fetch: bool, - file_uuid: str, - query_params: Mapping[str, str], - headers: Mapping[str, str], - authentication: Authentication | None - ): - - # Check the catalog in a separate step so that the plugins can be loaded - # safely, since doing so requires a valid catalog. We need the metadata - # plugin to know which file parameters to expect, and the repository - # plugin to validate the file version. - validate_params(query_params, - catalog=validate_catalog, - requestIndex=int, - allow_extra_params=True) - - request_index = int(query_params.get('requestIndex', '0')) - - validate_params(query_params, - catalog=str, - requestIndex=int, - wait=self._validate_wait, - replica=self._validate_replica, - token=str, - **self._file_param_validators(catalog, request_index)) - - file_version = query_params.get('version') - replica = query_params.get('replica') - file_name = query_params.get('fileName') - drs_uri = query_params.get('drsUri') - wait = query_params.get('wait') - token = query_params.get('token') - - if request_index == 0: - file = self.service.get_data_file(catalog=catalog, - file_uuid=file_uuid, - file_version=file_version, - filters=self.get_filters(catalog, authentication, None)) - if file is None: - raise NotFoundError(f'Unable to find file {file_uuid!r}, ' - f'version {file_version!r} in catalog {catalog!r}') - file = attr.evolve(file, **adict(name=file_name, drs_uri=drs_uri)) - else: - file = self._file_from_request(catalog, file_uuid, query_params) - - try: - range_specifier = headers['range'] - except KeyError: - pass - else: - requested_range = self._parse_range_request_header(range_specifier) - if requested_range == [(file.size, None)]: - # Due to https://github.com/curl/curl/issues/10521 which causes - # curl below 8.5.0 to fail when getting a 416 response for an - # attempt to resume a previously completed file download, - # instead, we return a 206 along with a `Content-Range` header, - # which has been confirmed to work for all curl versions tested - # (7.71.1 through 8.12.1). - return { - 'Status': 206, - 'Content-Length': 0, - 'Content-Range': f'bytes */{file.size}' - } - - plugin = self.repository_plugin(catalog) - - if config.enable_mirroring: - mirror_service = self.mirror_service(catalog) - is_mirrored = mirror_service.info_exists(file) - else: - mirror_service, is_mirrored = None, False - if is_mirrored: - # The file's content type would be None on subsequent requests since - # it isn't propagated via a query parameter. `MirrorFileDownload` - # will always be ready immediately. - assert request_index == 0, request_index - download = MirrorFileDownload( - file=file, - location=mirror_service.get_mirror_url(file), - replica=replica, - token=token - ) - assert download.retry_after is None, download - else: - download_cls = plugin.file_download_class() - download = download_cls(file=file, replica=replica, token=token) - - try: - download.update(plugin, authentication) - except LimitedTimeoutException as e: - raise ServiceUnavailableError(*e.args) - except TooManyRequestsException as e: - raise TooManyRequestsError(*e.args) - if download.retry_after is not None: - retry_after = min(download.retry_after, int(1.3 ** request_index)) - if wait is not None: - if wait == '0': - pass - elif wait == '1': - # Sleep in the lambda but ensure that we wake up before it - # runs out of execution time (and before API Gateway times - # out) so we get a chance to return a response to the client - remaining_time = self.lambda_context.get_remaining_time_in_millis() / 1000 - server_side_sleep = min(float(retry_after), - remaining_time - config.api_gateway_timeout_padding - 3) - time.sleep(server_side_sleep) - retry_after = round(retry_after - server_side_sleep) - else: - assert False, wait - query_params = self._file_to_request(download.file) | adict( - token=download.token, - replica=download.replica, - requestIndex=request_index + 1, - wait=wait - ) - return { - 'Status': 301, - **({'Retry-After': retry_after} if retry_after else {}), - 'Location': str(self.file_url_func(catalog=catalog, - file_uuid=file_uuid, - fetch=fetch, - **query_params)) - } - elif download.location is not None: - log_data = { - **file.to_json(), - 'catalog': catalog, - 'fetch': fetch, - **{ - k: headers.get(k) - for k in ('range', 'host', 'user-agent', 'x-forwarded-for') - } - } - log.info('Download of %s file %s', - 'mirrored' if is_mirrored else 'repository', - json.dumps(log_data)) - return { - 'Status': 302, - 'Location': download.location - } - else: - assert download.file.drs_uri is None, download - raise NotFoundError(f'File {file_uuid!r} with version {file_version!r} ' - f'was found in catalog {catalog!r}, however no download is currently available') - - @cache - def field_types(self, catalog: CatalogName) -> Mapping[str, FieldType]: - """ - Returns the field type for each supported sort and filter field, using - the name of the field as provided by clients. - """ - result = {} - plugin = self.service.metadata_plugin(catalog) - for field, path in plugin.field_mapping.items(): - field_type = self.service.field_type(catalog, path) - if isinstance(field_type, FieldType): - result[field] = field_type - # This field is a synthetic element of the response and will never be - # null. Including it here helps to streamline request validation. - accessible = plugin.special_fields.accessible - assert accessible not in result, result - result[accessible] = pass_thru_bool - return result - - def _validate_wait(self, wait: str | None): - if wait not in ('0', '1', None): - raise ValueError - - def _validate_replica(self, replica: str): - if replica not in ('aws', 'gcp'): - raise ValueError - - def _file_param_validators(self, - catalog: CatalogName, - request_index: int - ) -> dict[str, Callable[[Any], Any]]: - all_file_validators = dict( - version=self.repository_plugin(catalog).validate_version, - fileName=str, - drsUri=str, - sha256=str, - md5=str - ) - result = {} - for a in attrs.fields(self._file_class(catalog)): - try: - param_name = self._file_params_by_field[a.name] - except KeyError: - assert a.name == 'uuid' or is_optional(a.type), a - else: - validator = all_file_validators[param_name] - if request_index > 0 and not is_optional(a.type): - validator = Mandatory(validator) - result[param_name] = validator - return result - - def _file_from_request(self, - catalog: CatalogName, - uuid: str, - params: Mapping[str, str] - ) -> File: - file_class = self._file_class(catalog) - fields = {} - for a in attrs.fields(file_class): - if a.name == 'uuid': - value = uuid - else: - try: - # A KeyError here means we do not support passing the field as a query parameter - param_name = self._file_params_by_field[a.name] - # A KeyError here means we do support it, but no parameter was provided - value = params[param_name] - except KeyError: - assert is_optional(a.type), a - value = None - fields[a.name] = value - return file_class.from_json(fields) - - def _file_to_request(self, file: File) -> dict[str, str]: - params = {} - for a in attrs.fields(type(file)): - if a.name != 'uuid': - value = getattr(file, a.name) - param_name = self._file_params_by_field.get(a.name) - if param_name is None or not isinstance(value, str): - assert is_optional(a.type), (a.name, file) - else: - params[param_name] = value - return params - - _file_params_by_field = { - 'version': 'version', - 'name': 'fileName', - 'drs_uri': 'drsUri', - 'sha256': 'sha256', - 'md5': 'md5' - } - - def _file_class(self, catalog: CatalogName) -> type[File]: - return self.service.metadata_plugin(catalog).file_class diff --git a/src/azul/service/repository_service.py b/src/azul/service/repository_service.py deleted file mode 100644 index eb20e0a2da..0000000000 --- a/src/azul/service/repository_service.py +++ /dev/null @@ -1,381 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections.abc import ( - Mapping, - Sequence, -) -from concurrent.futures import ( - ThreadPoolExecutor, -) -import json -import logging -from typing import ( - TYPE_CHECKING, -) - -from more_itertools import ( - first, - one, -) -import opensearchpy -from opensearchpy import ( - Search, -) -from opensearchpy.helpers.response import ( - Hit, -) - -from azul import ( - CatalogName, - cache, - config, -) -from azul.plugins import ( - File, - RepositoryPlugin, - dotted, -) -from azul.service import ( - BadArgumentException, - FileUrlFunc, - Filters, -) -from azul.service.elasticsearch_service import ( - ElasticsearchService, - ElasticsearchStage, - IndexNotFoundError, - Pagination, - PaginationStage, - ResponseTriple, - ToDictStage, - _ElasticsearchStage, -) -from azul.types import ( - AnyMutableJSON, - JSON, - MutableJSON, -) -from azul.uuids import ( - validate_uuid, -) - -log = logging.getLogger(__name__) - - -class EntityNotFoundError(Exception): - - def __init__(self, entity_type: str, entity_id: str): - super().__init__(f"Can't find an entity in {entity_type} with an uuid, {entity_id}.") - - -class SearchResponseStage(_ElasticsearchStage[ResponseTriple, MutableJSON], - metaclass=ABCMeta): - - def prepare_request(self, request: Search) -> Search: - return request - - -class SummaryResponseStage(ElasticsearchStage[JSON, MutableJSON], - metaclass=ABCMeta): - - @property - @abstractmethod - def aggs_by_authority(self) -> Mapping[str, Sequence[str]]: - raise NotImplementedError - - def prepare_request(self, request: Search) -> Search: - return request - - -class RepositoryService(ElasticsearchService): - - @cache - def repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return RepositoryPlugin.load(catalog).create(catalog) - - def search(self, - *, - catalog: CatalogName, - entity_type: str, - file_url_func: FileUrlFunc, - item_id: str | None, - filters: Filters, - pagination: Pagination - ) -> MutableJSON: - """ - Returns data for a particular entity type of single item. - :param catalog: The name of the catalog to query - :param entity_type: Which index to search (i.e. 'projects', 'specimens', etc.) - :param pagination: A dictionary with pagination information as return from `_get_pagination()` - :param filters: parsed JSON filters from the request - :param item_id: If item_id is specified, only a single item is searched for - :param file_url_func: A function that is used only when getting a *list* of files data. - It creates the files URL based on info from the request. It should have the type - signature `(uuid: str, **params) -> str` - :return: The Elasticsearch JSON response - """ - if item_id is not None: - validate_uuid(item_id) - filters = filters.update({'entryId': {'is': [item_id]}}) - - response = self._search(catalog=catalog, - filters=filters, - pagination=pagination, - aggregate=item_id is None, - entity_type=entity_type) - - special_fields = self.metadata_plugin(catalog).special_fields - for hit in response['hits']: - entity = one(hit[entity_type]) - source_id = one(hit['sources'])[special_fields.source_id] - entity[special_fields.accessible] = source_id in filters.source_ids - - def inject_file_urls(node: AnyMutableJSON, *path: str) -> None: - if node is None: - pass - elif isinstance(node, (str, int, float, bool)): - pass - elif isinstance(node, list): - for child in node: - inject_file_urls(child, *path) - elif isinstance(node, dict): - if path: - try: - next_node = node[path[0]] - except KeyError: - # Not all node trees will match the given path. (e.g. a - # response from the 'files' index won't have a - # 'matrices' in its 'hits[].projects' inner entities. - pass - else: - inject_file_urls(next_node, *path[1:]) - else: - try: - version = node['version'] - uuid = node['uuid'] - drs_uri = node['drs_uri'] - except KeyError: - for child in node.values(): - inject_file_urls(child, *path) - else: - plugin = self.repository_plugin(catalog) - if drs_uri is None and plugin.file_download_class().needs_drs_uri: - node['url'] = None - else: - node['url'] = str(file_url_func(catalog=catalog, - fetch=False, - file_uuid=uuid, - version=version)) - else: - assert False - - inject_file_urls(response['hits'], 'projects', 'contributedAnalyses') - inject_file_urls(response['hits'], 'projects', 'matrices') - inject_file_urls(response['hits'], 'files') - - if item_id is not None: - response = one(response['hits'], too_short=EntityNotFoundError(entity_type, item_id)) - return response - - def _search(self, - *, - catalog: CatalogName, - entity_type: str, - aggregate: bool, - filters: Filters, - pagination: Pagination - ) -> MutableJSON: - """ - This function does the whole transformation process. It takes the path - of the config file, the filters, and pagination, if any. Excluding - filters will do a match_all request. Excluding pagination will exclude - pagination from the output. - - :param catalog: The name of the catalog to query - - :param entity_type: the string referring to the entity type used to get - the ElasticSearch index to search - - :param aggregate: Whether to perform the aggregation stage or not. - - :param filters: Filter parameter from the API to be used in the query. - - :param pagination: Pagination to be used for the API - - :return: Returns the transformed request - """ - plugin = self.metadata_plugin(catalog) - field_mapping = plugin.field_mapping - - for facet in filters.explicit.keys(): - if facet != plugin.special_fields.accessible and facet not in field_mapping: - raise BadArgumentException(f'Unable to filter by undefined facet {facet}.') - - facet = pagination.sort - if facet not in field_mapping: - raise BadArgumentException(f'Unable to sort by undefined facet {facet}.') - - chain = self.create_chain(catalog=catalog, - entity_type=entity_type, - filters=filters, - post_filter=True, - document_slice=None) - - chain = ToDictStage(service=self, - catalog=catalog, - entity_type=entity_type).wrap(chain) - - if aggregate: - chain = plugin.aggregation_stage.create_and_wrap(chain) - - chain = PaginationStage(service=self, - catalog=catalog, - entity_type=entity_type, - pagination=pagination, - peek_ahead=True, - filters=filters).wrap(chain) - - response_stage_cls = plugin.search_response_stage - if TYPE_CHECKING: # work around https://youtrack.jetbrains.com/issue/PY-44728 - response_stage_cls = SearchResponseStage - chain = response_stage_cls(service=self, - catalog=catalog, - entity_type=entity_type).wrap(chain) - - request = self.create_request(catalog, entity_type) - request = chain.prepare_request(request) - try: - response = request.execute(ignore_cache=True) - except opensearchpy.NotFoundError as e: - raise IndexNotFoundError(e.info['error']['index']) - response = chain.process_response(response) - return response - - def summary(self, - catalog: CatalogName, - filters: Filters - ) -> MutableJSON: - # FIXME: Due to the fact that we run multiple requests in parallel each - # in a separate chain, and the resulting need to multiplex the - # responses, the response stage is not part of any chain. - # https://github.com/DataBiosphere/azul/issues/4128 - plugin = self.metadata_plugin(catalog) - response_stage = plugin.summary_response_stage() - - aggs_by_authority = response_stage.aggs_by_authority - - def summary(entity_type): - return entity_type, self._summary(catalog=catalog, - entity_type=entity_type, - filters=filters) - - with ThreadPoolExecutor(max_workers=len(aggs_by_authority)) as executor: - aggs = dict(executor.map(summary, aggs_by_authority)) - - aggs = { - agg_name: aggs[entity_type][agg_name] - for entity_type, summary_fields in aggs_by_authority.items() - for agg_name in summary_fields - } - - response = response_stage.process_response(aggs) - return response - - def _summary(self, - *, - catalog: CatalogName, - entity_type: str, - filters: Filters - ) -> MutableJSON: - plugin = self.metadata_plugin(catalog) - chain = self.create_chain(catalog=catalog, - entity_type=entity_type, - filters=filters, - post_filter=False, - document_slice=None) - chain = ToDictStage(service=self, - catalog=catalog, - entity_type=entity_type).wrap(chain) - chain = plugin.summary_aggregation_stage.create_and_wrap(chain) - request = chain.prepare_request(self.create_request(catalog, entity_type)) - - response = request.execute(ignore_cache=True) - assert len(response.hits) == 0 - - if config.debug == 2 and log.isEnabledFor(logging.DEBUG): - log.debug('Elasticsearch request: %s', json.dumps(request.to_dict(), indent=4)) - - result = chain.process_response(response) - - return result - - def get_data_file(self, - catalog: CatalogName, - file_uuid: str, - file_version: str | None, - filters: Filters, - ) -> File | None: - """ - Return the inner `files` entity describing the data file with the - given UUID and version. - - :param catalog: the catalog to search in - - :param file_uuid: the UUID of the data file - - :param file_version: the version of the data file, if absent the most - recent version will be returned - - :param filters: parsed filters from the request - - :return: The inner `files` entity or None if the catalog does not - contain information about the specified data file - """ - filters = filters.update({ - 'fileId': {'is': [file_uuid]}, - **( - {'fileVersion': {'is': [file_version]}} - if file_version is not None else - {} - ) - }) - - def _hit_to_doc(hit: Hit) -> JSON: - return self.translate_fields(catalog, hit.to_dict(), forward=False) - - entity_type = 'files' - chain = self.create_chain(catalog=catalog, - entity_type=entity_type, - filters=filters, - post_filter=False, - document_slice=None) - request = self.create_request(catalog, entity_type) - request = chain.prepare_request(request) - - plugin = self.metadata_plugin(catalog) - if file_version is None: - field_path = dotted(plugin.field_mapping['fileVersion']) - request.sort({field_path: dict(order='desc')}) - - # Just need two hits to detect an ambiguous response - request.params(size=2) - - hits = list(map(_hit_to_doc, request.execute().hits)) - - if len(hits) == 0: - return None - elif len(hits) > 1: - # Can't have more than one hit with the same version - assert file_version is None, len(hits) - - file = one(first(hits)['contents']['files']) - file = plugin.file_class.from_hit(file) - if file_version is not None: - assert file_version == file.version - return file - - @property - def always_limit_access(self) -> bool: - return False diff --git a/src/azul/service/source_controller.py b/src/azul/service/source_controller.py deleted file mode 100644 index d8160eaa44..0000000000 --- a/src/azul/service/source_controller.py +++ /dev/null @@ -1,94 +0,0 @@ -import logging - -from chalice import ( - TooManyRequestsError, - UnauthorizedError, -) - -from azul import ( - CatalogName, - cached_property, -) -from azul.auth import ( - Authentication, -) -from azul.chalice import ( - BadGatewayError, - ServiceUnavailableError, -) -from azul.http import ( - LimitedTimeoutException, - TooManyRequestsException, -) -from azul.service import ( - Filters, -) -from azul.service.app_controller import ( - ServiceAppController, -) -from azul.service.source_service import ( - SourceService, -) -from azul.types import ( - JSONs, -) - -log = logging.getLogger(__name__) - - -class SourceController(ServiceAppController): - - @cached_property - def _source_service(self) -> SourceService: - return SourceService() - - def list_sources(self, - catalog: CatalogName, - authentication: Authentication | None - ) -> JSONs: - try: - sources = self._source_service.list_sources(catalog, authentication) - except PermissionError: - raise UnauthorizedError - except LimitedTimeoutException as e: - raise ServiceUnavailableError(*e.args) - except TooManyRequestsException as e: - raise TooManyRequestsError(*e.args) - else: - authoritative_source_ids = {source.id for source in sources} - cached_source_ids = self._list_source_ids(catalog, authentication) - # For optimized performance, the cache may include source IDs that - # are accessible but are not configured for indexing. Therefore, we - # expect the set of actual sources to be a subset of the cached - # sources. - diff = authoritative_source_ids - cached_source_ids - if diff: - log.debug(diff) - raise BadGatewayError('Inconsistent response from repository') - return [ - {'sourceId': source.id, 'sourceSpec': str(source.spec)} - for source in sources - ] - - def _list_source_ids(self, - catalog: CatalogName, - authentication: Authentication | None - ) -> set[str]: - try: - source_ids = self._source_service.list_source_ids(catalog, authentication) - except PermissionError: - raise UnauthorizedError - except LimitedTimeoutException as e: - raise ServiceUnavailableError(*e.args) - except TooManyRequestsException as e: - raise TooManyRequestsError(*e.args) - else: - return source_ids - - def get_filters(self, - catalog: CatalogName, - authentication: Authentication | None, - filters: str | None = None - ) -> Filters: - return Filters(explicit=self._parse_filters(filters), - source_ids=self._list_source_ids(catalog, authentication)) diff --git a/src/azul/service/source_service.py b/src/azul/service/source_service.py deleted file mode 100644 index cd3c88ed6f..0000000000 --- a/src/azul/service/source_service.py +++ /dev/null @@ -1,123 +0,0 @@ -import json -import logging -from time import ( - time, -) -from typing import ( - Iterable, -) - -from azul import ( - CatalogName, - cache, - config, -) -from azul.auth import ( - Authentication, -) -from azul.deployment import ( - aws, -) -from azul.indexer import ( - SourceRef, -) -from azul.plugins import ( - RepositoryPlugin, -) -from azul.types import ( - AnyJSON, -) - -log = logging.getLogger(__name__) - - -class CacheMiss(Exception): - pass - - -class NotFound(CacheMiss): - - def __init__(self, key: str): - super().__init__(f'Key not found: {key!r}') - - -class Expired(CacheMiss): - - def __init__(self, key: str): - super().__init__(f'Entry for key {key!r} is expired') - - -class SourceService: - - @cache - def _repository_plugin(self, catalog: CatalogName) -> RepositoryPlugin: - return RepositoryPlugin.load(catalog).create(catalog) - - def list_source_ids(self, - catalog: CatalogName, - authentication: Authentication | None - ) -> set[str]: - plugin = self._repository_plugin(catalog) - - cache_key = ( - catalog, - '' if authentication is None else authentication.identity() - ) - joiner = ':' - assert not any(joiner in c for c in cache_key), cache_key - cache_key = joiner.join(cache_key) - try: - source_ids = set(self._get(cache_key)) - except CacheMiss: - source_ids = plugin.list_source_ids(authentication) - self._put(cache_key, list(source_ids)) - return source_ids - - def list_sources(self, - catalog: CatalogName, - authentication: Authentication | None - ) -> Iterable[SourceRef]: - return self._repository_plugin(catalog).list_sources(authentication) - - table_name = config.dynamo_sources_cache_table_name - - key_attribute = 'identity' - value_attribute = 'sources' - ttl_attribute = 'expiration' - - # Timespan in seconds that sources persist in the cache - expiration = 60 - - @property - def _dynamodb(self): - return aws.dynamodb - - def _get(self, key: str) -> list[AnyJSON]: - response = self._dynamodb.get_item(TableName=self.table_name, - Key={self.key_attribute: {'S': key}}, - ProjectionExpression=','.join([self.value_attribute, self.ttl_attribute])) - try: - result = response['Item'] - except KeyError: - raise NotFound(key) - else: - # Items can persist in DynamoDB after they are marked as expired - # https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/howitworks-ttl.html - if int(result[self.ttl_attribute]['N']) < self._now(): - raise Expired(key) - else: - return json.loads(result[self.value_attribute]['S']) - - def _put(self, key: str, sources: list[AnyJSON]) -> None: - item = { - self.key_attribute: {'S': key}, - self.value_attribute: {'S': json.dumps(sources)}, - self.ttl_attribute: { - 'N': str(self._now() + self.expiration) - } - } - self._dynamodb.put_item(TableName=self.table_name, - Item=item) - - def _now(self) -> int: - return int(time()) diff --git a/src/azul/service/storage_service.py b/src/azul/service/storage_service.py deleted file mode 100644 index ad9f1822f0..0000000000 --- a/src/azul/service/storage_service.py +++ /dev/null @@ -1,372 +0,0 @@ -from __future__ import ( - annotations, -) - -from collections.abc import ( - Mapping, - Sequence, -) -from dataclasses import ( - dataclass, -) -from datetime import ( - datetime, - timedelta, - timezone, -) -from email.utils import ( - parsedate_to_datetime, -) -from logging import ( - getLogger, -) -import time -from typing import ( - Collection, - IO, - TYPE_CHECKING, -) -from urllib.parse import ( - urlencode, -) - -import botocore -import botocore.exceptions -from botocore.response import ( - StreamingBody, -) -from more_itertools import ( - chunked, -) -from werkzeug.http import ( - parse_dict_header, -) - -from azul import ( - R, -) -from azul.collections import ( - OrderedSet, -) -from azul.deployment import ( - aws, -) - -if TYPE_CHECKING: - from mypy_boto3_s3.client import ( - S3Client, - ) - from mypy_boto3_s3.service_resource import ( - MultipartUpload, - ) - from mypy_boto3_s3.type_defs import ( - HeadObjectOutputTypeDef, - ) - -log = getLogger(__name__) - -# 5 MB; see https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html -AWS_S3_DEFAULT_MINIMUM_PART_SIZE = 5242880 - -MULTIPART_UPLOAD_MAX_WORKERS = 4 - -# The amount of pending tasks that can be queued for execution. A value of 0 -# allows no tasks to be queued, only running tasks allowed in the thread pool. -MULTIPART_UPLOAD_MAX_PENDING_PARTS = 4 - -Tagging = Mapping[str, str] - - -class StorageObjectNotFound(Exception): - pass - - -class StorageObjectExists(Exception): - pass - - -class StorageService: - - def __init__(self, bucket_name: str | None = None): - if bucket_name is None: - bucket_name = aws.storage_bucket - self.bucket_name = bucket_name - - @property - def _s3(self) -> S3Client: - return aws.s3 - - def head(self, object_key: str) -> HeadObjectOutputTypeDef: - try: - return self._s3.head_object(Bucket=self.bucket_name, - Key=object_key) - except self._s3.exceptions.ClientError as e: - if int(e.response['Error']['Code']) == 404: - raise StorageObjectNotFound(object_key) - else: - raise e - - def get(self, object_key: str) -> bytes: - try: - response = self._s3.get_object(Bucket=self.bucket_name, - Key=object_key) - except self._s3.exceptions.NoSuchKey: - raise StorageObjectNotFound(object_key) - else: - return response['Body'].read() - - def put(self, - object_key: str, - data: bytes, - content_type: str | None = None, - tagging: Tagging | None = None, - *, - overwrite: bool = True, - **kwargs): - try: - self._s3.put_object(Bucket=self.bucket_name, - Key=object_key, - Body=data, - **self._object_creation_kwargs(content_type=content_type, - tagging=tagging, - overwrite=overwrite), - **kwargs) - except botocore.exceptions.ClientError as e: - self._handle_overwrite(e, object_key) - - def delete(self, keys: Collection[str], batch_size: int = 1000) -> None: - assert batch_size <= 1000, R('Batch size must <= 1000', batch_size) - num_keys = len(keys) - for batch in chunked(keys, batch_size): - log.debug('Deleting batch of objects: %r', batch) - self._s3.delete_objects(Bucket=self.bucket_name, - Delete={ - 'Objects': [ - {'Key': key} - for key in batch - ] - }) - log.info('Deleted %d objects overall', num_keys) - - def list(self, prefix: str) -> OrderedSet[str]: - keys, num_keys = OrderedSet(), 0 - paginator = self._s3.get_paginator('list_objects_v2') - for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix): - contents = page.get('Contents', ()) - num_keys += len(contents) - keys.update(object['Key'] for object in contents) - assert len(keys) == num_keys, R('Got duplicate keys from S3') - return keys - - def create_multipart_upload(self, - object_key: str, - content_type: str | None = None, - tagging: Tagging | None = None) -> MultipartUpload: - kwargs = self._object_creation_kwargs(content_type=content_type, - tagging=tagging) - return self._create_multipart_upload(object_key=object_key, **kwargs) - - def _create_multipart_upload(self, *, object_key, **kwargs) -> MultipartUpload: - api_response = self._s3.create_multipart_upload(Bucket=self.bucket_name, - Key=object_key, - **kwargs) - upload_id = api_response['UploadId'] - return self.load_multipart_upload(object_key, upload_id) - - def load_multipart_upload(self, object_key, upload_id) -> MultipartUpload: - s3 = aws.s3_resource - return s3.MultipartUpload(self.bucket_name, object_key, upload_id) - - def upload_multipart_part(self, - buffer: str | bytes | IO | StreamingBody, - part_number: int, - upload: MultipartUpload - ) -> str: - return upload.Part(part_number).upload(Body=buffer)['ETag'] - - def complete_multipart_upload(self, - upload: MultipartUpload, - etags: Sequence[str], - *, - overwrite: bool = True, - ) -> None: - parts = [ - { - 'PartNumber': index + 1, - 'ETag': etag - } - for index, etag in enumerate(etags) - ] - try: - upload.complete(MultipartUpload={'Parts': parts}, - **self._object_creation_kwargs(overwrite=overwrite)) - except botocore.exceptions.ClientError as e: - self._handle_overwrite(e, upload.object_key) - - def upload(self, - file_path: str, - object_key: str, - content_type: str | None = None, - tagging: Tagging | None = None): - self._s3.upload_file(Filename=file_path, - Bucket=self.bucket_name, - Key=object_key, - ExtraArgs=self._object_creation_kwargs(content_type=content_type)) - # upload_file doesn't support tags so we need to make a separate request - # https://stackoverflow.com/a/56351011/7830612 - if tagging: - self.put_object_tagging(object_key, tagging) - - def _object_creation_kwargs(self, - *, - content_type: str | None = None, - tagging: Tagging | None = None, - overwrite: bool = True - ) -> Mapping[str, str]: - kwargs = {} - if content_type is not None: - kwargs['ContentType'] = content_type - if tagging is not None: - kwargs['Tagging'] = urlencode(tagging) - if overwrite is False: - kwargs['IfNoneMatch'] = '*' - return kwargs - - def get_presigned_url(self, - key: str, - *, - file_name: str | None = None, - content_type: str | None = None - ) -> str: - """ - Return a pre-signed URL to the given key. - - :param key: The key of the S3 object whose content a request to the - signed URL will return - - :param file_name: the file name to be returned as part of a - Content-Disposition header in the response to a - request to the signed URL. If None, no such header - will be present in the response. - - :param content_type: the value for the Content-Type header in the - response to a request to the signed URL. If None, - the value stored in the object's metadata will be - used. - """ - assert file_name is None or '"' not in file_name, file_name - return self._s3.generate_presigned_url( - ClientMethod=self._s3.get_object.__name__, - Params={ - 'Bucket': self.bucket_name, - 'Key': key, - **( - {} - if file_name is None else - {'ResponseContentDisposition': f'attachment;filename="{file_name}"'} - ), - **( - {} - if content_type is None else - {'ResponseContentType': content_type} - ) - }) - - def put_object_tagging(self, object_key: str, tagging: Tagging = None): - deadline = time.time() + 60 - tagging = {'TagSet': [{'Key': k, 'Value': v} for k, v in tagging.items()]} - log.info('Tagging object %r with %r', object_key, tagging) - while True: - try: - self._s3.put_object_tagging(Bucket=self.bucket_name, - Key=object_key, - Tagging=tagging) - except self._s3.exceptions.NoSuchKey: - if time.time() > deadline: - log.error('Unable to tag %s on object.', tagging) - raise - else: - log.warning('Object key %s is not found. Retrying in 5 s.', object_key) - time.sleep(5) - else: - break - - def get_object_tagging(self, object_key: str) -> Tagging: - response = self._s3.get_object_tagging(Bucket=self.bucket_name, Key=object_key) - tagging = {tag['Key']: tag['Value'] for tag in response['TagSet']} - return tagging - - def time_until_object_expires(self, object_key: str, expiration: int) -> float: - """ - The time, in seconds, before the object at the given key will expire. - - :param object_key: The key of the object - - :param expiration: the number of days between the last write of an - object and its expected expiration by a bucket - lifecycle rule. This parameter is solely used to - verify the return value. - """ - response = self.head(object_key) - return self._time_until_object_expires(response, expiration) - - def _time_until_object_expires(self, - head_response: HeadObjectOutputTypeDef, - expiration: int - ) -> float: - now = datetime.now(timezone.utc) - # Example header value - # expiry-date="Fri, 21 Dec 2012 00:00:00 GMT", rule-id="Rule for testfile.txt" - expiration_header = parse_dict_header(head_response['Expiration']) - expiry = parsedate_to_datetime(expiration_header['expiry-date']) - time_left = (expiry - now).total_seconds() - # Verify the 'Expiration' value is what is expected given the - # 'LastModified' value, the number of days before expiration, and that - # AWS rounds the expiration up to midnight UTC. - last_modified = head_response['LastModified'] - last_modified_floor = last_modified.replace(hour=0, - minute=0, - second=0, - microsecond=0) - if last_modified != last_modified_floor: - expiration += 1 - expected_expiry = last_modified_floor + timedelta(days=expiration) - if expiry == expected_expiry: - log.debug('Object expires in %s seconds, on %s', - time_left, expiry) - else: - log.error('Actual object expiration (%s) does not match expected value (%s)', - expiration_header, expected_expiry) - return time_left - - def _handle_overwrite(self, - exception: botocore.exceptions.ClientError, - object_key: str - ): - error = exception.response['Error'] - # `Condition` is only present when using conditional writes - code, condition = error['Code'], error.get('Condition') - if code == 'PreconditionFailed' and condition == 'If-None-Match': - raise StorageObjectExists(object_key) - else: - raise exception - - -@dataclass -class Part: - etag: str | None # If ETag is defined, the content is already pushed to S3. - part_number: int - content: bytes - - @property - def already_uploaded(self): - return self.etag is not None - - def to_dict(self): - return dict(PartNumber=self.part_number, ETag=self.etag) - - -class MultipartUploadError(RuntimeError): - - def __init__(self, bucket_name, object_key): - super(MultipartUploadError, self).__init__(f'{bucket_name}/{object_key}') diff --git a/src/azul/template/__init__.py b/src/azul/template/__init__.py deleted file mode 100644 index 1f39030b1d..0000000000 --- a/src/azul/template/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -from contextlib import ( - contextmanager, -) -import json -import os -import sys -import tempfile -from typing import ( - IO, -) - -from azul.types import ( - AnyJSON, -) - - -def emit(json_doc: AnyJSON | None): - with emit_text(remove=json_doc is None) as f: - json.dump(json_doc, f, indent=4) - - -@contextmanager -def emit_text(*, remove: bool = False): - path = sys.argv[1] - f: IO[str] - if remove: - try: - os.unlink(path) - except FileNotFoundError: - pass - else: - print(f'Removed {path}') - with open('/dev/null', 'a') as f: - yield f - else: - f = tempfile.NamedTemporaryFile(mode='w+', - dir=os.path.dirname(path), - encoding='utf-8', delete=False) - try: - yield f - except BaseException: - os.unlink(f.name) - raise - else: - print(f'Creating {path}') - os.rename(f.name, path) - finally: - f.close() diff --git a/src/azul/template/__main__.py b/src/azul/template/__main__.py deleted file mode 100644 index e32723781d..0000000000 --- a/src/azul/template/__main__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Usage: python -m azul.template foo.json.template.py foo.json - -Same as ``python foo.json.template.py foo.json`` but configures script logging -""" -import logging -import sys - -from azul.logging import ( - configure_script_logging, -) -from azul.modules import ( - load_module, -) - -# This module is the real __main__ -# -assert __name__ == '__main__' - -# Even though we don't directly use the logger here, we need to instantiate and -# configure it. If we called configure_script_logging() without passing the -# logger, any logger instantiated by the template script would not be considered -# an Azul logger -# -log = logging.getLogger(__name__) -configure_script_logging(log) - -# Shift the arguments so that the output file name becomes sys.argv[1] as -# expected by the emit… functions in the sibling __init__.py -# -template = sys.argv.pop(1) - -# Invoke the template script and pretend that its module name is also __main__ -# -load_module(template, __name__) diff --git a/src/azul/terra.py b/src/azul/terra.py deleted file mode 100644 index 74efa35b00..0000000000 --- a/src/azul/terra.py +++ /dev/null @@ -1,714 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections.abc import ( - Sequence, -) -from enum import ( - StrEnum, - auto, -) -import json -import logging -from time import ( - sleep, -) -from typing import ( - ClassVar, - Optional, - Self, -) - -import attrs -from chalice import ( - UnauthorizedError, -) -from furl import ( - furl, -) -from google.api_core.exceptions import ( - BadRequest, - Forbidden, - InternalServerError, - ServiceUnavailable, -) -from google.auth.transport.requests import ( - Request, -) -from google.cloud import ( - bigquery, -) -from google.cloud.bigquery import ( - DatasetReference, - QueryJob, - QueryJobConfig, - QueryPriority, -) -from google.cloud.bigquery.table import ( - RowIterator, -) -from more_itertools import ( - one, -) -import urllib3 -import urllib3.exceptions -import urllib3.request -import urllib3.response - -from azul import ( - Config, - R, - RequirementError, - cache, - config, - mutable_furl, - reject, - require, -) -from azul.auth import ( - OAuth2, -) -from azul.bigquery import ( - BigQueryRows, -) -from azul.deployment import ( - aws, -) -from azul.drs import ( - DRSClient, -) -from azul.http import ( - LimitedRetryHttpClient, - LimitedTimeoutException, - Propagate429HttpClient, -) -from azul.indexer import ( - SourceRef, - SourceSpec, -) -from azul.oauth2 import ( - CredentialsProvider, - OAuth2Client, - ServiceAccountCredentials, - TokenCredentials, -) -from azul.strings import ( - trunc_ellipses, -) -from azul.types import ( - JSON, - MutableJSON, - json_dict, - json_int, - json_list, - json_mapping, - json_str, -) - -log = logging.getLogger(__name__) - - -@attrs.frozen(kw_only=True) -class TDRSourceSpec(SourceSpec): - class Type(StrEnum): - bigquery = auto() - parquet = auto() - - class Domain(StrEnum): - gcp = auto() - azure = auto() - - type: Type - domain: Domain - subdomain: str - name: str - - @classmethod - def parse(cls, spec: str) -> Self: - """ - Construct an instance from its string representation, using the syntax - 'tdr:{type}:{domain}:{subdomain}:{name}:{prefix}', where prefix is - either the empty string or '{common_prefix}/{partition_prefix}'. - - >>> s = TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar:/0') - >>> s # doctest: +NORMALIZE_WHITESPACE - TDRSourceSpec(prefix=Prefix(common='', partition=0), - type=, - domain=, - subdomain='foo', - name='bar') - - >>> str(s) - 'tdr:bigquery:gcp:foo:bar:/0' - - >>> TDRSourceSpec.parse('tdr:spam:gcp:foo:bar:/0') - Traceback (most recent call last): - ... - ValueError: 'spam' is not a valid TDRSourceSpec.Type - - >>> TDRSourceSpec.parse('tdr:bigquery:eggs:foo:bar:/0') - Traceback (most recent call last): - ... - ValueError: 'eggs' is not a valid TDRSourceSpec.Domain - - If any :'s are missing, the last part will be interpreted as the prefix - - >>> TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar') - Traceback (most recent call last): - ... - ValueError: ('Missing partition prefix length', 'bar') - - >>> TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar:') - ... # doctest: +NORMALIZE_WHITESPACE - TDRSourceSpec(prefix=None, - type=, - domain=, - subdomain='foo', - name='bar') - - >>> TDRSourceSpec.parse('tdr:bigquery:gcp:foo:aaa') - Traceback (most recent call last): - ... - ValueError: ('Missing partition prefix length', 'aaa') - - >>> TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar:n32/0') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDPrefixError: 'n32' is not a valid UUID prefix. - """ - rest, prefix = cls._parse(spec) - # BigQuery (and by extension the TDR) does not allow : or / in dataset names - service, type, domain, subdomain, name = rest.split(':') - assert service == 'tdr', service - type = cls.Type(type) - reject(type == cls.Type.parquet, 'Parquet sources are not yet supported') - domain = cls.Domain(domain) - reject(domain == cls.Domain.azure, 'Azure sources are not yet supported') - self = cls(prefix=prefix, - type=type, - domain=domain, - subdomain=subdomain, - name=name) - assert spec == str(self), spec - return self - - def __str__(self) -> str: - """ - The inverse of :meth:`parse`. - - >>> s = 'tdr:bigquery:gcp:foo:bar:/0' - >>> s == str(TDRSourceSpec.parse(s)) - True - - >>> s = 'tdr:bigquery:gcp:foo:bar:22/0' - >>> s == str(TDRSourceSpec.parse(s)) - True - - >>> s = 'tdr:bigquery:gcp:foo:bar:22/2' - >>> s == str(TDRSourceSpec.parse(s)) - True - """ - return ':'.join([ - 'tdr', - self.type.value, - self.domain.value, - self.subdomain, - self.name, - self._prefix_str - ]) - - def qualify_table(self, table_name: str) -> str: - return '.'.join((self.subdomain, self.name, table_name)) - - -class TDRSourceRef(SourceRef[TDRSourceSpec]): - pass - - -class TerraCredentialsProvider(CredentialsProvider, metaclass=ABCMeta): - - @abstractmethod - def insufficient_access(self, resource: str) -> Exception: - raise NotImplementedError - - -@attrs.frozen(kw_only=True) -class ServiceAccountCredentialsProvider(TerraCredentialsProvider): - service_account: Config.ServiceAccount - - def oauth2_scopes(self) -> Sequence[str]: - # Minimum scopes required for SAM registration - return [ - 'https://www.googleapis.com/auth/userinfo.email', - 'openid' - ] - - @cache - def scoped_credentials(self) -> ServiceAccountCredentials: - with aws.service_account_credentials(self.service_account) as file_name: - credentials = ServiceAccountCredentials.from_service_account_file(file_name) - credentials = credentials.with_scopes(self.oauth2_scopes()) - credentials.refresh(Request()) # Obtain access token - return credentials - - def insufficient_access(self, resource: str): - return RequirementError( - f'The service account (SA) {self.scoped_credentials().service_account_email!r} is not ' - f'authorized to access {resource} or that resource does not exist. Make sure ' - f'that it exists, that the SA is registered with SAM and has been granted read ' - f'access to the resource.' - ) - - -class IndexerServiceAccountCredentialsProvider(ServiceAccountCredentialsProvider): - - def oauth2_scopes(self) -> Sequence[str]: - return [ - *super().oauth2_scopes(), - 'https://www.googleapis.com/auth/devstorage.read_only', - 'https://www.googleapis.com/auth/bigquery.readonly' - ] - - -class UserCredentialsProvider(TerraCredentialsProvider): - - def __init__(self, authentication: OAuth2): - self.token = authentication.identity() - - def oauth2_scopes(self) -> Sequence[str]: - return ['https://www.googleapis.com/auth/userinfo.email'] - - @cache - def scoped_credentials(self) -> TokenCredentials: - # FIXME: this assumes the user has selected all required scopes. - return TokenCredentials(self.token, scopes=self.oauth2_scopes()) - - def insufficient_access(self, resource: str): - scopes = ', '.join(self.oauth2_scopes()) - return UnauthorizedError( - f'The current user is not authorized to access {resource} or that ' - f'resource does not exist. Make sure that it exists, that the user ' - f'is registered with Terra, that the provided access token is not ' - f'expired, and that the following access scopes were granted when ' - f'authenticating: {scopes}.' - ) - - -class TerraClientException(Exception): - pass - - -class TerraStatusException(TerraClientException): - - def __init__(self, url: furl, response: urllib3.response.HTTPResponse): - super().__init__(f'Unexpected response from {url}', - response.status, response.data) - - -class TerraNameConflictException(TerraClientException): - - def __int__(self, url: furl, source_name: str, response_json: JSON): - super().__init__(f'More than one source named {source_name!r}', - str(url), response_json) - - -class TerraConcurrentModificationException(TerraClientException): - - def __init__(self) -> None: - super().__init__('Snapshot listing changed while we were paging through it') - - -@attrs.frozen(kw_only=True) -class TerraClient(OAuth2Client): - """ - A client to a service in the Broad Institute's Terra ecosystem. - """ - credentials_provider: TerraCredentialsProvider - - def _create_http_client(self) -> urllib3.request.RequestMethods: - return Propagate429HttpClient( - LimitedRetryHttpClient( - super()._create_http_client() - ) - ) - - def _request(self, - method: str, - url: furl, - *, - headers=None, - body=None - ) -> urllib3.HTTPResponse: - response = self._http_client.request(method, - str(url), - headers=headers, - body=body) - - assert isinstance(response, urllib3.HTTPResponse) - header_name = 'WWW-Authenticate' - try: - header_value = response.headers[header_name] - except KeyError: - pass - else: - log.warning('_request(…) -> %r: %r', header_name, header_value) - return response - - -class SAMClient(TerraClient): - """ - A client to Broad's SAM (https://github.com/broadinstitute/sam). TDR uses - SAM for authorization, and SAM uses Google OAuth 2.0 for authentication. - """ - - def register_with_sam(self) -> None: - """ - Register the current service account with SAM. - - https://github.com/DataBiosphere/jade-data-repo/blob/develop/docs/register-sa-with-sam.md - """ - email = self.service_account_credentials.service_account_email - url = config.sam_service_url.set(path='/register/user/v1') - response = self._request('POST', url, body='') - if response.status == 201: - log.info('Google service account %r successfully registered with SAM.', email) - elif response.status == 409: - log.info('Google service account %r previously registered with SAM.', email) - elif response.status == 500 and b'Cannot update googleSubjectId' in response.data: - raise RuntimeError( - 'Unable to register service account. SAM does not allow re-registration of a ' - 'new service account whose name matches that of another previously registered ' - 'service account. Please refer to the troubleshooting section of the README.', - email - ) - else: - raise TerraStatusException(url, response) - - def is_registered(self) -> bool: - """ - Check whether the user or service account associated with the current - client's credentials is registered with SAM. - """ - endpoint = config.sam_service_url.set(path='/register/user/v1') - response = self._request('GET', endpoint) - auth_header = response.headers.get('WWW-Authenticate') - if response.status == 200: - return True - elif response.status == 404: - return False - elif response.status == 401 and auth_header and 'invalid_token' in auth_header: - raise PermissionError('The provided authentication is invalid') - else: - raise TerraStatusException(endpoint, response) - - def _insufficient_access(self, resource: str) -> Exception: - return self.credentials_provider.insufficient_access(resource) - - -class TDRClient(SAMClient): - """ - A client for the Broad Institute's Terra Data Repository aka "Jade". - """ - - @cache - def lookup_source(self, source_spec: TDRSourceSpec) -> str: - """ - Validate that the repository's reported values for the snapshot's Google - project name and storage location match our expectations, and return the - snapshot's UUID. - """ - source = self._lookup_source(source_spec) - actual_project = source['dataProject'] - require(actual_project == source_spec.subdomain, - 'Actual Google project of TDR source differs from configured one', - actual_project, source_spec.subdomain) - storage = one( - resource - for resource in map(json_dict, json_list(source['storage'])) - if json_str(resource['cloudResource']) == 'bigquery' - ) - actual_location = json_str(storage['region']) - # Uppercase is standard for multi-regions in the documentation but TDR - # returns 'us' in lowercase - require(actual_location.lower() == config.tdr_source_location.lower(), - 'Actual storage location of TDR source differs from configured one', - actual_location, config.tdr_source_location) - return json_str(source['id']) - - def _retrieve_source(self, source: TDRSourceRef) -> MutableJSON: - endpoint = self._repository_endpoint('snapshots', source.id) - response = self._request('GET', endpoint) - response = self._check_response(endpoint, response) - require(source.spec.name == response['name'], - 'Source name changed unexpectedly', source, response) - return response - - def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON: - endpoint = self._repository_endpoint('snapshots') - endpoint.set(args=dict(filter=source.name, limit='2')) - response = self._request('GET', endpoint) - response = self._check_response(endpoint, response) - total = json_int(response['filteredTotal']) - if total == 0: - raise self._insufficient_access(str(endpoint)) - elif total == 1: - return json_dict(one(json_list(response['items']))) - else: - raise TerraNameConflictException(endpoint, source.name, response) - - def check_bigquery_access(self, source: TDRSourceSpec): - """ - Verify that the client is authorized to read from TDR BigQuery tables. - """ - resource = f'BigQuery dataset {source.name!r} in Google Cloud project {source.subdomain!r}' - try: - self.run_sql(f''' - SELECT * - FROM `{source.subdomain}.{source.name}.INFORMATION_SCHEMA.TABLES` - LIMIT 1 - ''') - except Forbidden: - raise self._insufficient_access(resource) - else: - log.info('TDR client is authorized to access tables in %s', resource) - - @cache - def _bigquery(self, project: str) -> bigquery.Client: - # We get a false warning from PyCharm here, probably because of - # - # https://youtrack.jetbrains.com/issue/PY-23400/regression-PEP484-type-annotations-in-docstrings-nearly-completely-broken - # - # Google uses the docstring syntax to annotate types in its BQ client. - # - # noinspection PyTypeChecker - return bigquery.Client(project=project, credentials=self.credentials) - - class _EmptySQLResult(Exception): - pass - - def run_sql(self, query: str) -> BigQueryRows: - bigquery = self._bigquery(self.service_account_credentials.project_id) - if log.isEnabledFor(logging.DEBUG): - log.debug('Query (%r characters total): %r', - len(query), self._trunc_query(query)) - job: QueryJob - if config.bigquery_batch_mode: - job_config = QueryJobConfig(priority=QueryPriority.BATCH) - job = bigquery.query(query, job_config=job_config) - result = job.result() - job_info = self._job_info(job, result) - else: - delays = (10, 20, 40, 80) - assert sum(delays) < config.contribution_lambda_timeout(retry=False) - for attempt, delay in enumerate((*delays, None)): - job = bigquery.query(query) - try: - result = job.result() - job_info = self._job_info(job, result) - if not self._job_has_result(job_info): - raise self._EmptySQLResult - except ( - BadRequest, - Forbidden, - InternalServerError, - ServiceUnavailable, - self._EmptySQLResult - ) as e: - if delay is None: - raise e - elif isinstance(e, Forbidden) and 'Exceeded rate limits' not in e.message: - raise e - elif (isinstance(e, BadRequest) - and 'project does not have the reservation in the data region' not in e.message): - raise e - else: - log.warning('BigQuery job error during attempt %i/%i. Retrying in %is.', - attempt + 1, len(delays) + 1, delay, exc_info=e) - sleep(delay) - else: - break - else: - assert False - if log.isEnabledFor(logging.DEBUG): - log.debug('Job info: %s', json.dumps(job_info)) - return result - - def list_tables(self, source: TDRSourceSpec) -> set[str]: - bigquery = self._bigquery(self.service_account_credentials.project_id) - ref = DatasetReference(project=source.subdomain, dataset_id=source.name) - return { - table.to_api_repr()['tableReference']['tableId'] - for table in bigquery.list_tables(ref) - } - - def _trunc_query(self, query: str) -> str: - return trunc_ellipses(query, 2048) - - def _job_info(self, job: QueryJob, result: RowIterator) -> JSON: - # noinspection PyProtectedMember - stats = job._properties['statistics']['query'] - if config.debug < 2: - ignore = ('referencedTables', 'statementType', 'queryPlan') - stats = {k: v for k, v in stats.items() if k not in ignore} - return { - 'job_id': job.job_id, - 'total_rows': result.total_rows, - 'stats': stats, - 'query': self._trunc_query(job.query) - } - - def _job_has_result(self, job_info: JSON) -> bool: - # In order to detect when a BigQuery job silently fails (i.e. returns - # no rows when there should be some), we check for an expected field in - # the jobs stats that we have observed missing in prior silent failures. - return 'totalBytesProcessed' in json_mapping(job_info['stats']) - - def _repository_endpoint(self, *path: str) -> mutable_furl: - return config.tdr_service_url.set(path=('api', 'repository', 'v1', *path)) - - def _duos_endpoint(self, *path: str) -> mutable_furl: - url = config.duos_service_url - assert url is not None - return url.set(path=('api', *path)) - - def _check_response(self, - endpoint: furl, - response: urllib3.HTTPResponse - ) -> MutableJSON: - if response.status == 200: - return json.loads(response.data) - # FIXME: Azul sometimes conflates 401 and 403 - # https://github.com/DataBiosphere/azul/issues/4463 - elif response.status in (401, 403): - raise self._insufficient_access(str(endpoint)) - else: - raise TerraStatusException(endpoint, response) - - page_size: ClassVar[int] = 1000 - - def snapshot_ids(self) -> set[str]: - """ - List the IDs of the TDR snapshots accessible to the current credentials. - Much faster than listing the snapshots' names. - """ - endpoint = self._repository_endpoint('snapshots', 'roleMap') - response = self._request('GET', endpoint) - response = self._check_response(endpoint, response) - return set(json_dict(response['roleMap']).keys()) - - def snapshot_names_by_id(self, - *, - filter: Optional[str] = None - ) -> dict[str, str]: - """ - List the TDR snapshots accessible to the current credentials. - - :param filter: Unless None, a string that must occur in the description - or name of the snapshots to be listed - """ - # For reference: https://github.com/DataBiosphere/jade-data-repo/blob - # /22ff5c57d46db42c874639e1ffa6ad833c51e29f - # /src/main/java/bio/terra/service/snapshot/SnapshotDao.java#L550 - # - # The creation of a snapshot is only one of the two ways a snapshot is - # added to the list. The other way is making an existing snapshot - # accessible. Sorting by creation date only defends against the first - # scenario, not the second. Also note that as we page through - # snapshots, a snapshot we already retrieved might be removed and - # another one added. If the added one precedes the current page, we - # won't notice at all. - # - endpoint = self._repository_endpoint('snapshots') - snapshots = {} - before = 0 - while True: - args = dict(offset=before, - limit=self.page_size, - sort='created_date', - direction='asc') - if filter is not None: - args['filter'] = filter - endpoint.set(args=args) - response = self._request('GET', endpoint) - response = self._check_response(endpoint, response) - snapshots.update({ - json_str(snapshot['id']): json_str(snapshot['name']) - for snapshot in map(json_dict, json_list(response['items'])) - }) - after = len(snapshots) - total = json_int(response['filteredTotal']) - if after == total: - break - elif after > total or after == before: - # Something is off if we got more snapshots than reported by TDR - # or if there was no progress even though we got fewer than that. - raise TerraConcurrentModificationException() - before = after - return snapshots - - @classmethod - def for_indexer(cls) -> Self: - return cls( - credentials_provider=IndexerServiceAccountCredentialsProvider( - service_account=config.ServiceAccount.indexer - ) - ) - - @classmethod - def for_anonymous_user(cls) -> Self: - return cls( - credentials_provider=ServiceAccountCredentialsProvider( - service_account=config.ServiceAccount.public - ) - ) - - @classmethod - def for_registered_user(cls, authentication: OAuth2) -> Self: - self = cls(credentials_provider=UserCredentialsProvider(authentication)) - try: - self.validate() - except AssertionError as e: - if R.caused(e): - log.warning('Invalid credentials', exc_info=e) - raise UnauthorizedError('Invalid credentials') - else: - raise - else: - return self - - def drs_client(self) -> DRSClient: - return DRSClient(http_client=self._http_client) - - def get_duos(self, - source: TDRSourceRef - ) -> tuple[str, MutableJSON] | tuple[None, None]: - """ - Return the DUOS ID and DUOS dataset registration information for the - given TDR snapshot. - """ - body = self._retrieve_source(source) - try: - duos_id = json_str(json_dict(body['duosFirecloudGroup'])['duosId']) - except (KeyError, AssertionError): - log.warning('No DUOS ID available for %r', source.spec) - return None, None - else: - url = self._duos_endpoint('dataset', 'registration', duos_id) - # FIXME: Fail on timeout instead of faking response - # https://github.com/DataBiosphere/azul/issues/7230 - try: - response = self._request('GET', url) - except LimitedTimeoutException: - body = {'studyDescription': '[Description currently not available]'} - return duos_id, body - if response.status == 404: - log.warning('No DUOS dataset registration with ID %r from %r', - duos_id, source.spec) - return None, None - else: - body = self._check_response(url, response) - consent_group = json_dict(one(json_list(body['consentGroups']))) - require(duos_id == json_str(consent_group['datasetIdentifier']), - 'Mismatched identifiers', duos_id, consent_group) - return duos_id, body diff --git a/src/azul/terraform.py b/src/azul/terraform.py deleted file mode 100644 index 8f7b67082b..0000000000 --- a/src/azul/terraform.py +++ /dev/null @@ -1,969 +0,0 @@ -from collections import ( - defaultdict, -) -from collections.abc import ( - Iterable, - Sequence, -) -import gzip -from itertools import ( - chain, -) -import json -import logging -from pathlib import ( - Path, -) -import subprocess -from typing import ( - Mapping, -) - -import attr - -from azul import ( - cache, - cached_property, - config, - require, -) -from azul.chalice import ( - AzulChaliceApp, -) -from azul.deployment import ( - aws, -) -from azul.json import ( - copy_any_json, - copy_json, -) -from azul.template import ( - emit, -) -from azul.types import ( - AnyMutableJSON, - CompositeJSON, - JSON, - JSONs, - MutableJSON, - json_composite, - json_dict, - json_element_dicts, - json_item_dicts, - json_item_mappings, - json_mapping, - json_str, - not_none, -) - -log = logging.getLogger(__name__) - - -@attr.s(auto_attribs=True, kw_only=True, frozen=True) -class TerraformSchema: - versions: JSON - document: JSON - path: Path - - @classmethod - def load(cls, path: Path): - with gzip.open(path, 'rt') as f: - doc = json.load(f) - return cls(versions=doc['versions'], - document=doc['schema'], - path=path) - - def store(self): - with gzip.open(self.path, 'wt') as f: - doc = dict(versions=self.versions, schema=self.document) - json.dump(doc, f) - - -class Terraform: - - def taggable_resource_types(self) -> set[str]: - schema = self.schema.document - version = schema['format_version'] - require(version == '1.0', 'Unexpected format version', version) - resources = chain.from_iterable( - provider['resource_schemas'].items() - for provider in schema['provider_schemas'].values() - if 'resource_schemas' in provider - ) - return { - resource_type - for resource_type, resource in resources - if 'tags' in resource['block']['attributes'] - } - - def run(self, *args: str, **kwargs) -> str: - args = ['terraform', *args] - log.info('Running %r', args) - cmd = subprocess.run(args, - check=True, - stdout=subprocess.PIPE, - text=True, - shell=False, - **kwargs) - return cmd.stdout - - def run_state_list(self) -> list[str]: - try: - stdout = self.run('state', 'list', stderr=subprocess.PIPE) - except subprocess.CalledProcessError as e: - if e.returncode == 1 and 'No state file was found' in e.stderr: - log.info('No state file was found, assuming empty list of resources.') - return [] - else: - raise - else: - return stdout.splitlines() - - schema_path = Path(config.project_root) / 'terraform' / '_schema.json.gz' - - @cached_property - def schema(self): - return TerraformSchema.load(self.schema_path) - - def update_schema(self): - schema = self.run('providers', 'schema', '-json') - schema = TerraformSchema(versions=self.versions, - document=json.loads(schema), - path=self.schema_path) - schema.store() - # Reset the cache - try: - # noinspection PyPropertyAccess - del self.schema - except AttributeError: - pass - - @cached_property - def versions(self) -> MutableJSON: - output = self.run('version', '-json') - log.info('Terraform output:\n%s', output) - versions = json.loads(output) - return { - 'terraform': versions['terraform_version'], - 'providers': versions['provider_selections'] - } - - -terraform = Terraform() -del Terraform - - -def emit_tf(config: JSON | None, *, tag_resources: bool = True) -> None: - if config is None: - emit(config) - else: - emit(_transform_tf(config, tag_resources=tag_resources)) - - -def _sanitize_tf(tf_config: CompositeJSON) -> CompositeJSON: - """ - Avoid errors like - - Error: Missing block label - - on api_gateway.tf.json line 12: - 12: "resource": [] - - At least one object property is required, whose name represents the resource - block's type. - """ - if isinstance(tf_config, Mapping): - return {k: v for k, v in tf_config.items() if v} - elif isinstance(tf_config, Sequence): - return [v for v in tf_config if v] - else: - assert False, type(tf_config) - - -def _normalize_tf(tf_config: CompositeJSON) -> Iterable[tuple[str, JSON]]: - """ - Certain levels of a Terraform JSON structure can either be a single - dictionary or a list of dictionaries. For example, these are equivalent: - - {"resource": {"": {"": {"foo": ...}}}} - {"resource": [{"": {"": {"foo": ...}}}]} - - So are these: - - {"resource": {"": {"": {"foo": ...}, "": {"bar": ...}}}} - {"resource": {"": [{"": {"foo": ...}}, {"": {"bar": ...}}]}} - - This function normalizes input to prefer the second form of both cases to - make parsing Terraform configuration simpler. It returns an iterator of the - dictionary entries in the argument, regardless which form is used. - - >>> def n(c): - ... return list(_normalize_tf(c)) - - >>> n({}) - [] - - A Singleton dict: - - >>> n({'t': {'r':{}}}) - [('t', {'r': {}})] - - A singleton list of a singleton dict: - - >>> n([{'t': {'r': {}}}]) - [('t', {'r': {}})] - - A two-entry dict: - - >>> n({"t1": {"r1": {}}, "t2": {"r2" :{}}}) - [('t1', {'r1': {}}), ('t2', {'r2': {}})] - - A two-entry list of singleton dicts: - - >>> n([{"t1": {"r1": {}}}, {"t2": {"r2": {}}}]) - [('t1', {'r1': {}}), ('t2', {'r2': {}})] - - A singleton list of a two-entry dict: - - >>> n([{"t1": {"r1": {}}, "t2": {"r2": {}}}]) - [('t1', {'r1': {}}), ('t2', {'r2': {}})] - - A two-entry list of two-entry dicts: - - >>> n([{"t1": {"r1": {}}, "t2": {"r2": {}}}, {"t1": {"r3": {}}, "t2": {"r4": {}}}]) - [('t1', {'r1': {}}), ('t2', {'r2': {}}), ('t1', {'r3': {}}), ('t2', {'r4': {}})] - """ - if isinstance(tf_config, Mapping): - return json_item_mappings(tf_config) - elif isinstance(tf_config, Sequence): - return chain.from_iterable(map(json_item_mappings, tf_config)) - else: - assert False, type(tf_config) - - -def _transform_tf(tf_config: JSON, *, tag_resources: bool = True) -> JSON: - """ - Add tags to all taggable resources and change the `name` tag to `Name` - for tagged AWS resources. - """ - taggable_types = terraform.taggable_resource_types() if tag_resources else {} - return json_mapping(_sanitize_tf({ - block_name: _sanitize_tf([ - _sanitize_tf({ - resource_type: _sanitize_tf([ - { - resource_name: { - **resource, - **( - _tagged_resource(resource_type, resource_name, resource) - if block_name == 'resource' and resource_type in taggable_types else - {} - ) - } - } - for resource_name, resource in _normalize_tf(json_composite(resources)) - ]) - }) - for resource_type, resources in _normalize_tf(json_composite(block)) - ]) - if block_name in {'data', 'resource'} else - block - for block_name, block in tf_config.items() - })) - - -def _tagged_resource(resource_type: str, resource_name: str, resource: JSON) -> JSON: - tags = json_mapping(resource.get('tags', {})) - return { - 'tags': _tags(resource_type, resource_name, tags) - } - - -def _tags(resource_type: str, resource_name: str, tags: JSON) -> JSON: - """ - Return tags named for cloud resources based on :class:`azul.Config`. - - :param resource_type: The Terraform resource type - - :param resource_name: The Terraform name of the resource - - :param tags: Additional tags that override the defaults - - >>> from azul.doctests import assert_json - >>> from test.azul_test_case import patch_config - - >>> with patch_config('terraform_component', 'foo'): - ... assert_json(_tags('aws_instance', 'service', {})) - ... #doctest: +ELLIPSIS - { - "billing": "...", - "service": "azul", - "deployment": "...", - "owner": "...", - "Name": "azul-...", - "component": "azul-service", - "terraform_component": "foo" - } - - >>> with patch_config('terraform_component', None): - ... assert_json(_tags('aws_instance', 'service', {'billing' : 'foo'})) - ... #doctest: +ELLIPSIS - { - "billing": "foo", - "service": "azul", - "deployment": "...", - "owner": "...", - "Name": "azul-service-...", - "component": "azul-service" - } - """ - component = f'{config.resource_prefix}-{resource_name}' - tags = { - 'billing': config.billing, - 'service': config.resource_prefix, - 'deployment': config.deployment_stage, - 'owner': config.owner, - **( - { - 'name': component, - 'component': component, - 'terraform_component': config.terraform_component - } - if config.terraform_component else - { - 'name': config.qualified_resource_name(resource_name), - 'component': component - } - ), - **tags - } - return { - 'Name' if k == 'name' and resource_type.startswith('aws_') else k: v - for k, v in tags.items() - } - - -def provider_fragment(region: str) -> JSON: - """ - Return a fragment of Terraform configuration JSON that specifies a - resource's provider. Empty JSON will be returned if the resource's region - is the same as the default region. - A non-default region must first be configured by adding a matching provider - for that region in `providers.tf.json`. - """ - if region == config.region: - return {} - else: - return {'provider': f'aws.{region}'} - - -def block_public_s3_bucket_access(tf_config: JSON) -> JSON: - """ - Return a shallow copy of the given TerraForm configuration embellished with - an aws_s3_bucket_public_access_block resource for each of the aws_s3_bucket - resources in the argument. This is a convenient way to block public access - to every bucket in a given Terraform configuration. The argument is not - modified but the return value may share parts of the argument. - """ - tf_config = copy_json(tf_config, 'resource') - resources = json_dict(tf_config['resource']) - bucket_resources = json_dict(resources['aws_s3_bucket']) - resources['aws_s3_bucket_public_access_block'] = { - resource_name: { - **( - {'provider': resource['provider']} - if 'provider' in resource else - {} - ), - 'bucket': '${aws_s3_bucket.%s.id}' % resource_name, - 'block_public_acls': True, - 'block_public_policy': True, - 'ignore_public_acls': True, - 'restrict_public_buckets': True - } for resource_name, resource in json_item_dicts(bucket_resources) - } - return tf_config - - -def enable_s3_bucket_inventory(tf_config: JSON, - dest_bucket_ref: str = 'data.aws_s3_bucket.logs', - /, - ) -> JSON: - tf_config = copy_json(tf_config, 'resource') - resources = json_dict(tf_config['resource']) - bucket_resources = json_dict(resources['aws_s3_bucket']) - resources['aws_s3_bucket_inventory'] = { - resource_name: { - **( - {'provider': resource['provider']} - if 'provider' in resource else - {} - ), - 'bucket': '${aws_s3_bucket.%s.id}' % resource_name, - 'name': config.qualified_resource_name('inventory'), - 'included_object_versions': 'All', - 'destination': { - 'bucket': { - 'format': 'CSV', - 'bucket_arn': '${%s.arn}' % dest_bucket_ref, - 'prefix': 'inventory' - } - }, - 'schedule': { - 'frequency': 'Daily' - }, - 'optional_fields': [ - 'Size', - 'LastModifiedDate', - 'StorageClass', - 'ETag', - 'IsMultipartUploaded', - 'ReplicationStatus', - 'EncryptionStatus', - 'ChecksumAlgorithm', - 'BucketKeyStatus', - 'IntelligentTieringAccessTier', - 'ObjectLockMode', - 'ObjectLockRetainUntilDate', - 'ObjectLockLegalHoldStatus' - ] - } for resource_name, resource in json_item_dicts(bucket_resources) - } - return tf_config - - -def set_empty_s3_bucket_lifecycle_config(tf_config: JSON) -> JSON: - """ - Return a shallow copy of the given TerraForm configuration embellished with - an `aws_s3_bucket_lifecycle_configuration` resource for each of the - `aws_s3_bucket` resources in the argument that lack an explicit lifecycle - configuration. The argument is not modified but the return value may share - parts of the argument. - """ - tf_config = copy_json(tf_config, 'resource') - resources = json_dict(tf_config['resource']) - lifecycles = resources.get('aws_s3_bucket_lifecycle_configuration', {}) - explicit = { - json_str(lifecycle_config['bucket']).split('.')[1] - for _, lifecycle_config in json_item_dicts(lifecycles) - } - buckets = resources.get('aws_s3_bucket', {}) - for resource_name, bucket in json_item_dicts(buckets): - if resource_name not in explicit: - # We can't create a completely empty policy, but a disabled policy - # achieves the goal of preventing/removing policies that originate - # from outside TF. - bucket.setdefault('lifecycle_rule', { - 'id': config.qualified_resource_name('dummy'), - 'enabled': False, - 'expiration': {'days': 36500} - }) - return tf_config - - -class Chalice: - - def private_api_stage_config(self, app_name: str) -> JSON: - """ - Returns the stage-specific fragment of Chalice configuration JSON that - configures the Lambda function to be invoked by a private API Gateway, - if enabled. - """ - return { - 'api_gateway_endpoint_type': 'PRIVATE', - 'api_gateway_endpoint_vpce': ['${aws_vpc_endpoint.%s.id}' % app_name] - } if config.private_api else { - } - - def vpc_lambda_config(self, app_name: str) -> JSON: - """ - Returns the Lambda-specific fragment of Chalice configuration JSON that - configures the Lambda function to connect to the VPC. - """ - return { - 'subnet_ids': [ - '${data.aws_subnet.gitlab_%s_%s.id}' % (vpc.subnet_name(public=False), zone) - for zone in range(vpc.num_zones) - ], - 'security_group_ids': ['${aws_security_group.%s.id}' % app_name], - } - - def vpc_lambda_iam_policy(self, for_tf: bool = False) -> JSONs: - """ - Returns the fragment of IAM policy JSON needed for placing a Lambda - function into a VPC. - """ - actions = [ - 'ec2:CreateNetworkInterface', - 'ec2:DescribeNetworkInterfaces', - 'ec2:DeleteNetworkInterface', - ] - return [ - { - 'actions': actions, - 'resources': ['*'], - } if for_tf else { - 'Effect': 'Allow', - 'Action': actions, - 'Resource': ['*'] - } - ] - - def package_dir_path(self, app_name) -> Path: - root = Path(config.project_root) - return root / 'lambdas' / app_name / '.chalice' / 'terraform' - - def package_zip_path(self, app_name) -> Path: - return self.package_dir_path(app_name) / 'deployment.zip' - - def tf_config_path(self, app_name) -> Path: - return self.package_dir_path(app_name) / 'chalice.tf.json' - - def patch_resource_names(self, app_name: str, tf_config: JSON) -> MutableJSON: - """ - Patch the names of local variables, resources and data source in the - given Chalice-generated Terraform config. Definitions and references - will be patched. - - >>> from azul.doctests import assert_json - - >>> assert_json(chalice.patch_resource_names('indexer', { - ... 'locals': { - ... 'foo': '' - ... }, - ... 'data': { - ... 'aws_foo': { - ... 'bar': {} - ... } - ... }, - ... "resource": { - ... "aws_lambda_function": { - ... "indexercachehealth": { # patch - ... "foo": "${data.aws_foo.bar}${md5(local.foo)}" - ... } - ... }, - ... "aws_cloudwatch_event_rule": { - ... "indexercachehealth-event": { # patch - ... "name": "indexercachehealth-event" # leave - ... } - ... }, - ... "aws_cloudwatch_event_target": { - ... "indexercachehealth-event": { # patch - ... "rule": "${aws_cloudwatch_event_rule.indexercachehealth-event.name}", # patch - ... "target_id": "indexercachehealth-event", # leave - ... "arn": "${aws_lambda_function.indexercachehealth.arn}" - ... } - ... }, - ... "aws_lambda_permission": { - ... "indexercachehealth-event": { # patch - ... "function_name": "azul-indexer-prod-indexercachehealth", - ... "source_arn": "${aws_cloudwatch_event_rule.indexercachehealth-event.arn}" # patch - ... } - ... }, - ... "aws_lambda_event_source_mapping": { - ... "contribute-sqs-event-source": { - ... "batch_size": 1 - ... } - ... } - ... } - ... })) - { - "locals": { - "indexer_foo": "" - }, - "data": { - "aws_foo": { - "indexer_bar": {} - } - }, - "resource": { - "aws_lambda_function": { - "indexer_indexercachehealth": { - "foo": "${data.aws_foo.indexer_bar}${md5(local.indexer_foo)}" - } - }, - "aws_cloudwatch_event_rule": { - "indexer_indexercachehealth": { - "name": "indexercachehealth-event" - } - }, - "aws_cloudwatch_event_target": { - "indexer_indexercachehealth": { - "rule": "${aws_cloudwatch_event_rule.indexer_indexercachehealth.name}", - "target_id": "indexercachehealth-event", - "arn": "${aws_lambda_function.indexer_indexercachehealth.arn}" - } - }, - "aws_lambda_permission": { - "indexer_indexercachehealth": { - "function_name": "azul-indexer-prod-indexercachehealth", - "source_arn": "${aws_cloudwatch_event_rule.indexer_indexercachehealth.arn}" - } - }, - "aws_lambda_event_source_mapping": { - "indexer_contribute": { - "batch_size": 1 - } - } - } - } - """ - - renamed = {} - - def rename(block_name, resource_type, old): - # Rename and track the renaming as a side effect - new = self._rename_chalice_resource(app_name, old) - renamed[(block_name, resource_type, old)] = new - return new - - # Translate the definitions - tf_result: MutableJSON = { - block_name: { - resource_type: { - rename(block_name, resource_type, resource_name): copy_json(resource) - for resource_name, resource in json_item_mappings(resources) - } - for resource_type, resources in json_item_mappings(block) - } - if block_name in ('resource', 'data') else - { - rename(block_name, None, name): copy_any_json(value) - for name, value in json_mapping(block).items() - } - if block_name == 'locals' else - copy_any_json(block) - for block_name, block in tf_config.items() - } - - def ref(block_name: str, resource_type: str, name: str) -> str: - if block_name == 'resource': - return '.'.join([resource_type, name]) - elif block_name == 'locals': - return '.'.join(['local', name]) - else: - return '.'.join([block_name, resource_type, name]) - - ref_map = { - ref(block_name, resource_type, name): ref(block_name, resource_type, new_name) - for (block_name, resource_type, name), new_name in renamed.items() - } - assert len(ref_map) == len(renamed) - # Sort in reverse so that keys that are prefixes of other keys go last - rev_ref_map = sorted(ref_map.items(), reverse=True) - - def patch_refs(v: AnyMutableJSON) -> AnyMutableJSON: - if isinstance(v, dict): - return {k: patch_refs(v) for k, v in json_dict(v).items()} - elif isinstance(v, list): - return list(map(patch_refs, v)) - elif isinstance(v, str): - for old_ref, new_ref in rev_ref_map: - v = v.replace(old_ref, new_ref) - return v - else: - return v - - return json_dict(patch_refs(tf_result)) - - def rename_chalice_resource_in_tf_state(self, reference: str) -> str: - """ - Translate the resource and data references found Terraform state that - resulted from applying Terraform configuration generated by Chalice. - The configuration is assumed to have been applied as a module, which - is how we used to incorporate the Chalice-generated TF config into our - own. The returned references omit the module prefix and instead - disambiguate between indexer and service lambda directly in the - resource name, eliminating the need to apply the config as a module. - - >>> f = chalice.rename_chalice_resource_in_tf_state - - >>> f('module.chalice_indexer.aws_foo.rest_api') - 'aws_foo.indexer' - - >>> f('module.chalice_indexer.aws_foo.api_handler') - 'aws_foo.indexer' - - >>> f('module.chalice_indexer.aws_foo.rest_api_invoke') - 'aws_foo.indexer' - - >>> f('module.chalice_indexer.data.aws_foo.chalice') - 'data.aws_foo.indexer' - - >>> f('module.chalice_indexer.aws_foo.aggregate-sqs-event-source') - 'aws_foo.indexer_aggregate' - """ - prefix, module, *reference = reference.split('.') - assert prefix == 'module', prefix - prefix, module = module.split('_') - assert prefix == 'chalice' - return self.rename_chalice_resource(module, reference) - - def rename_chalice_resource(self, app_name: str, reference: list[str]) -> str: - """ - Translate the resource and data references found in Terraform - configuration generated by Chalice. - - :param reference: the reference to translate - - :param app_name: the name of the Lambda function to which the resource - belongs. - """ - assert app_name in ('service', 'indexer'), app_name - *reference, resource_type, resource_name = reference - if reference: - assert reference == ['data'] - resource_name = self._rename_chalice_resource(app_name, resource_name) - return '.'.join([*reference, resource_type, resource_name]) - - def _rename_chalice_resource(self, app_name: str, resource_name: str) -> str: - singletons = { - 'rest_api', - 'api_handler', - 'rest_api_invoke', - 'chalice', - 'chalice_api_swagger' - } - if resource_name in singletons: - resource_name = app_name - else: - resource_name = resource_name.removesuffix('-sqs-event-source') - resource_name = resource_name.removesuffix('-event') - resource_name = app_name + '_' + resource_name - return resource_name - - @cache - def tf_config(self, app_name): - with open(self.tf_config_path(app_name)) as f: - tf_config = json.load(f) - tf_config = self.patch_resource_names(app_name, tf_config) - resources = json_dict(tf_config['resource']) - data = json_dict(tf_config['data']) - locals = json_dict(tf_config['locals']) - - # null_data_source has been deprecated and locals should be used instead. - # However, the data sources defined underneath it aren't actually used - # anywhere so we can just delete the entry. - del data['null_data_source'] - - if config.private_api: - # Hack to inject the VPC endpoint IDs that Chalice doesn't (but should) - # add when the `api_gateway_endpoint_vpce` config is used. - rest_apis = json_dict(resources['aws_api_gateway_rest_api']) - rest_api = json_dict(rest_apis[app_name]) - json_dict(rest_api['endpoint_configuration'])['vpc_endpoint_ids'] = [ - '${aws_vpc_endpoint.%s.id}' % app_name - ] - - functions = json_item_dicts(json_dict(resources['aws_lambda_function'])) - for _, resource in functions: - assert 'layers' not in resource - resource['layers'] = ['${aws_lambda_layer_version.dependencies.arn}'] - env = config.es_endpoint_env( - es_endpoint=( - aws.es_endpoint - if config.share_es_domain else - '${aws_opensearch_domain.index.endpoint}:443' - ), - es_instance_count=( - not_none(aws.es_instance_count) - if config.share_es_domain else - '${aws_opensearch_domain.index.cluster_config[0].instance_count}' - ) - ) - json_dict(json_dict(resource['environment'])['variables']).update(env) - package_zip = str(self.package_zip_path(app_name)) - resource['source_code_hash'] = '${filebase64sha256("%s")}' % package_zip - resource['filename'] = package_zip - - assert 'aws_cloudwatch_log_group' not in resources - functions = json_item_dicts(resources['aws_lambda_function']) - resources['aws_cloudwatch_log_group'] = { - f'{resource_name}_lambda': { - 'name': f'/aws/lambda/{resource['function_name']}', - 'retention_in_days': config.audit_log_retention_days - } - for resource_name, resource in functions - } - - for resource_type, argument in [ - ('aws_cloudwatch_event_rule', 'name'), - ('aws_cloudwatch_event_target', 'target_id') - ]: - # Currently, Chalice fails to prefix the names of some resources. We - # need them to be prefixed with `azul-` to allow for limiting the - # scope of certain IAM permissions for Gitlab and, more importantly, - # the deployment stage so these resources are segregated by deployment. - for _, resource in json_item_dicts(resources[resource_type]): - function_name, _, suffix = json_str(resource[argument]).partition('-') - assert suffix == 'event', suffix - assert function_name, function_name - resource[argument] = config.qualified_resource_name(function_name) - - # Chalice-generated S3 bucket notifications include the bucket name in - # the resource name, resulting in an invalid resource name when the - # bucket name contains periods. Bucket names cannot include underscores - # (https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html), - # so replacing the periods with underscores results in valid resource - # names while retaining the correlation with bucket names. - try: - bucket_notifications = resources['aws_s3_bucket_notification'] - except KeyError: - pass - else: - resources['aws_s3_bucket_notification'] = { - key.replace('.', '_'): value - for key, value in json_item_dicts(bucket_notifications) - } - # To prevent a race condition by Terraform, we make the bucket - # notifications depend on the related aws_lambda_permission. - permissions_by_function = defaultdict(set) - permissions = resources['aws_lambda_permission'] - for permission_name, permission in json_item_dicts(permissions): - function_ref = permission['function_name'] - permissions_by_function[function_ref].add(permission_name) - for _, notification in json_item_dicts(resources['aws_s3_bucket_notification']): - assert 'depends_on' not in notification, notification - notification['depends_on'] = [ - f'aws_lambda_permission.{permission_name}' - for function in json_element_dicts(notification['lambda_function']) - for permission_name in permissions_by_function[function['lambda_function_arn']] - ] - - # The fix for https://github.com/aws/chalice/issues/1237 introduced the - # create_before_destroy hack and it may have helped but has far-ranging - # implications such as pushing create-before-destroy semantics upstream - # to the dependencies. - # - # This is what caused https://github.com/DataBiosphere/azul/issues/4752 - # - # Managing the stage as an explicit resource as per TF recommendation - # - # https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_deployment - # - # and using the new `replace_triggered_by` lifecycle property introduced - # in TF 1.2 to propagate the replacement downstream is a more intuitive - # and less intrusive fix. - # - deployments = json_dict(resources['aws_api_gateway_deployment']) - deployment = json_dict(deployments[app_name]) - stage_name = deployment.pop('stage_name') - require(stage_name == config.deployment_stage, - 'The TF config from Chalice does not match the selected deployment', - stage_name, config.deployment_stage) - del json_dict(deployment['lifecycle'])['create_before_destroy'] - assert not deployment['lifecycle'], deployment - del deployment['lifecycle'] - deployment['triggers'] = {'redeployment': deployment.pop('stage_description')} - - # Using Terraform to specify the REST API minimum compression size - # proved to be problematic as it would first make an UpdateRestApi call - # to set the property, followed by a PutRestApi call with mode=overwrite - # which would reset the property back to its default value (disabled). - # Setting this property using AWS API Gateway extensions to the OpenAPI - # specification works around this issue. - # - # We ran into similar difficulties when using Terraform to configure - # default responses for the API, so we use these extensions for that - # purpose, too. - # - openapi_spec = json.loads(json_str(locals[app_name])) - rest_apis = json_dict(resources['aws_api_gateway_rest_api']) - rest_api = json_dict(rest_apis[app_name]) - assert 'minimum_compression_size' not in rest_api, rest_api - key = 'x-amazon-apigateway-minimum-compression-size' - openapi_spec[key] = config.minimum_compression_size - - # When mapping a static value to a response parameter, the value - # must be enclosed within a pair of single quotes. Note that - # azul.strings.single_quote() is not used here since API Gateway allows - # internal single quotes, which that function would prohibit. - # - # https://docs.aws.amazon.com/apigateway/latest/developerguide/request-response-data-mappings.html#mapping-response-parameters - # - security_headers = { - f'gatewayresponse.header.{k}': f"'{v}'" - for k, v in AzulChaliceApp.security_headers().items() - } - assert 'aws_api_gateway_gateway_response' not in resources, resources - openapi_spec['x-amazon-apigateway-gateway-responses'] = ( - { - f'DEFAULT_{response_type}': { - 'responseParameters': security_headers - } for response_type in ['4XX', '5XX'] - } | { - response_type: { - 'responseParameters': { - **security_headers, - 'gatewayresponse.header.Retry-After': "'10'" - }, - 'responseTemplates': { - "application/json": json.dumps({ - 'message': '504 Gateway Timeout. Wait the number of ' - 'seconds specified in the `Retry-After` ' - 'header before retrying the request.' - }) - } - } for response_type in ['INTEGRATION_TIMEOUT', 'INTEGRATION_FAILURE'] - } - ) - locals[app_name] = json.dumps(openapi_spec) - - # Replace the hard-coded ARN emitted by Chalice with a resource - # reference so that the event source (the queue) is created before the - # event source mapping depending on it. - # - if app_name == 'indexer': - event_source_mappings = resources['aws_lambda_event_source_mapping'] - for _, resource in json_item_dicts(event_source_mappings): - _, _, resource_name = json_str(resource['event_source_arn']).rpartition(':') - suffix = '.fifo' if resource_name.endswith('.fifo') else '' - sqs_name, _ = config.unqualified_resource_name(resource_name, suffix) - resource['event_source_arn'] = f'${{aws_sqs_queue.{sqs_name}.arn}}' - - return { - 'resource': resources, - 'data': data, - 'locals': locals - } - - -chalice = Chalice() - - -class VPC: - num_zones = 2 # An ALB needs at least two availability zones - - # These are TF resource names, the real-world resource names are fixed by AWS. - default_vpc_name = 'default' - default_security_group_name = 'default' - - @classmethod - def subnet_name(cls, public: bool) -> str: - return 'public' if public else 'private' - - @classmethod - def subnet_number(cls, zone: int, public: bool) -> int: - # Returns even numbers for private subnets, odd numbers for public - # subnets. The advantage of this numbering scheme is that it won't be - # perturbed by adding zones. - return 2 * zone + int(public) - - @classmethod - def security_rule(cls, **rule): - return { - 'cidr_blocks': None, - 'ipv6_cidr_blocks': None, - 'prefix_list_ids': None, - 'from_port': None, - 'protocol': None, - 'security_groups': None, - 'self': None, - 'to_port': None, - 'description': None, - **rule - } - - -vpc = VPC() -del VPC diff --git a/src/azul/threads.py b/src/azul/threads.py deleted file mode 100644 index 4dc89a4a37..0000000000 --- a/src/azul/threads.py +++ /dev/null @@ -1,258 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections.abc import ( - Iterable, -) -from concurrent.futures import ( - Future, - ThreadPoolExecutor, - as_completed, -) -import logging -import threading -import time - -from azul import ( - require, -) - -log = logging.getLogger(__name__) - - -class Latch: - """ - >>> l = Latch(1) - >>> l.decrement(1) # opens the latch - >>> l.decrement(1) # latch already open, but decrements value - >>> l.value - -1 - - >>> l = Latch(0) # latch is initially open - >>> l.decrement(1) - >>> l.value - -1 - - >>> l = Latch(0) - >>> l.decrement(0) - >>> l.value - 0 - - - >>> l = Latch(value=0) - >>> l.decrement(0, timeout=0.01) - >>> l.value - 0 - - >>> l = Latch(value=2) - >>> l.decrement(1, timeout=0.01) # not enough to open latch, so time out - Traceback (most recent call last): - ... - TimeoutError - >>> l.decrement(1) # opens latch - >>> l.value - 0 - - >>> from concurrent.futures import ThreadPoolExecutor - >>> n = 2 - >>> with ThreadPoolExecutor(max_workers=n) as tpe: - ... l = Latch(n) - ... fs = [tpe.submit(l.decrement, 1) for i in range(n)] - >>> list(map(Future.result, fs)) - [None, None] - - >>> with ThreadPoolExecutor(max_workers=n) as tpe: - ... l = Latch(n+1) - ... fs = [tpe.submit(l.decrement, 1, timeout=1) for i in range(n)] - >>> list(map(Future.result, fs)) - Traceback (most recent call last): - ... - TimeoutError - """ - - def __init__(self, value): - require(isinstance(value, int)) - self.value = value - self.condition = threading.Condition() - - def decrement(self, value, *, timeout=None): - require(isinstance(value, int)) - self.condition.acquire() - try: - self.value -= value - if self.value > 0: - while True: - if self.condition.wait(timeout=timeout): - if self.value <= 0: - break - else: - raise TimeoutError - else: - self.condition.notify_all() - finally: - self.condition.release() - - -class DeferredTaskExecutor(metaclass=ABCMeta): - """ - A wrapper around ThreadPoolExecutor that allows for conveniently deferring - method calls to be performed concurrently, optionally after other deferred - method calls have completed and/or a given amount of time has passed. - - >>> class MyExecutor(DeferredTaskExecutor): - ... - ... def __init__(self) -> None: - ... super().__init__(num_workers=2) - ... self.delta = None - ... self.a, self.b, self.c, self.d = None, None, None, None - ... - ... def _run(self): - ... foo = self._defer(self.set, time.time(), 1, b=2, delay=1.23) - ... self._defer(self.sum, run_after=[foo]) - ... - ... def set(self, start, a, b=None): - ... self.delta = time.time() - start - ... self._defer(self.never, run_after=[self._defer(self.err)]) - ... self.a = a - ... self.b = b - ... - ... def sum(self): - ... self.c = self.a + self.b - ... - ... def err(self): - ... raise ValueError(123) - ... - ... def never(self): - ... self.d = 1 - - >>> from logging import Logger - >>> import unittest.mock - >>> with unittest.mock.patch.object(Logger, 'warning') as mock_warning: - ... e = MyExecutor() - ... e.run() # err() raises an exception, and emits a warning log - [ValueError(123)] - - >>> mock_warning.mock_calls - [call('Exception in deferred callable', exc_info=True)] - - >>> 1.23 <= e.delta < 2 # set() runs after the given delay, but not much later - True - - >>> e.a, e.b, e.c, e.d # sum() runs after set(), and never() does not run at all - (1, 2, 3, None) - """ - - @abstractmethod - def _run(self) -> None: - """ - Subclasses override this method for the top-level task they'd like to be performed. - - This method typically calls _defer() at least once. - """ - raise NotImplementedError - - def __init__(self, num_workers: int | None = None) -> None: - super().__init__() - self.tpe = ThreadPoolExecutor(max_workers=num_workers) - self.futures: set[Future] = set() - - def run(self) -> list[BaseException]: - """ - Clients call this method to initiate the top-level task. - - :return: A list of the exceptions that occurred in deferred methods. - """ - with self.tpe: - self._run() - return self._collect_futures() - - def _defer(self, - callable_, - *args, - run_after: Iterable[Future] | None = None, - start_time: float | None = None, - delay: float | None = None, - **kwargs) -> Future: - """ - Invoke the given callable (typically a method of this class or a function nested in a method) with the given - arguments and after the preconditions are met. - - :param callable_: the callable to invoke - - :param args: the positional arguments to pass to the callable - - :param kwargs: the keyword arguments to pass to the callable - - :param run_after: the futures representing other callables that must complete successfully before - this callable is invoked - - :param start_time: an optional absolute point in time (as returned by time.time()) - before which that task will not be invoked, defaults to now - - :param delay: an optional number of seconds that will be added to start_time - - :return: a Future instance representing the callable - """ - if start_time is None: - if delay is not None: - start_time = time.time() + delay - else: - if delay is not None: - start_time = start_time + delay - - def run_if_possible(): - can_run = self._check_run_after(run_after) if run_after else True - if can_run is False: - raise self.UnsatisfiedDependency - elif can_run is True and (start_time is None or start_time < time.time()): - return callable_(*args, **kwargs) - else: - return self._defer(callable_, *args, run_after=run_after, start_time=start_time, **kwargs) - - def log_exceptions_early(future): - e = future.exception() - if e is not None and not isinstance(e, self.UnsatisfiedDependency): - log.warning('Exception in deferred callable', exc_info=True) - - future = self.tpe.submit(run_if_possible) - future.add_done_callback(log_exceptions_early) - self.futures.add(future) - return future - - class UnsatisfiedDependency(RuntimeError): - pass - - def _check_run_after(self, run_after: Iterable[Future]) -> bool | None: - for future in run_after: - while True: - if future.done(): - if future.exception(): - return False # at least one future failed - else: - # Tasks that call _defer() will return a future which needs to be examined recursively. - # This tail recursion could be of arbitrary depth. - result = future.result() - if isinstance(result, Future): - future = result - else: - break - else: - return None # some futures are not yet done - return True # all futures succeeded - - def _collect_futures(self): - errors = [] - num_secondary_errors = 0 - while self.futures: - for future in as_completed(self.futures): - e = future.exception() - if e is not None: - if isinstance(e, self.UnsatisfiedDependency): - num_secondary_errors += 1 - else: - errors.append(e) - self.futures.remove(future) - # We cannot have any secondary errors without primary ones - assert bool(errors) or not bool(num_secondary_errors) - return errors diff --git a/src/azul/time.py b/src/azul/time.py deleted file mode 100644 index fbc7b292d8..0000000000 --- a/src/azul/time.py +++ /dev/null @@ -1,193 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from datetime import ( - datetime, -) -import email.utils -import re -import time - -from azul import ( - require, -) -from azul.types import ( - LambdaContext, -) - - -class RemainingTime(metaclass=ABCMeta): - """ - A monotonically decreasing, non-negative estimate of time remaining in a - particular context - """ - - @abstractmethod - def get(self) -> float: - """ - Returns the estimated remaining time in seconds - """ - raise NotImplementedError - - -class RemainingLambdaContextTime(RemainingTime): - """ - The estimated running time in an AWS Lambda context - """ - - def __init__(self, context: LambdaContext) -> None: - super().__init__() - self._context = context - - def get(self) -> float: - return self._context.get_remaining_time_in_millis() / 1000 - - -class RemainingTimeUntil(RemainingTime): - """ - The remaining wall clock time up to a given absolute deadline in terms of - time.time() - """ - - def __init__(self, deadline: float) -> None: - super().__init__() - self._deadline = deadline - - def get(self) -> float: - return max(0.0, self._deadline - time.time()) - - -class SpecificRemainingTime(RemainingTimeUntil): - """ - A specific relative amount of wall clock time in seconds - """ - - def __init__(self, amount: float) -> None: - require(amount >= 0, 'Initial remaining time must be non-negative') - super().__init__(time.time() + amount) - - -class AdjustedRemainingTime(RemainingTime): - """ - Some other estimate of remaining time, adjusted by a fixed offset. Use a - negative offset to reduce the remaining time or a positive offset to - increase it. - """ - - def __init__(self, offset: float, actual: RemainingTime) -> None: - super().__init__() - self._offset = offset - self._actual = actual - - def get(self) -> float: - return max(0.0, self._actual.get() + self._offset) - - -def parse_http_date(http_date: str, base_time: float | None = None) -> float: - """ - Convert an HTTP date string to a Python timestamp (UNIX time). - - :param http_date: a string matching https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 - - :param base_time: the timestamp for converting a relative HTTP date into - Python timestamp, if None, the current time will be used. - - >>> parse_http_date('123', 0.4) - 123.4 - >>> t = 1541313273.0 - >>> parse_http_date('Sun, 04 Nov 2018 06:34:33 GMT') == t - True - >>> parse_http_date('Sun, 04 Nov 2018 06:34:33 PST') == t + 8 * 60 * 60 - True - """ - if base_time is None: - base_time = time.time() - try: - relative = int(http_date) - except ValueError: - absolute = email.utils.parsedate_to_datetime(http_date) - return absolute.timestamp() - else: - return base_time + float(relative) - - -dcp2_datetime_format = '%Y-%m-%dT%H:%M:%S.%f%z' - - -def format_dcp2_datetime(d: datetime) -> str: - """ - Convert a tz-aware (UTC) datetime into a '2020-01-01T00:00:00.000000Z' - formatted string. - - >>> from datetime import timezone - >>> format_dcp2_datetime(datetime(2020, 12, 31, 23, 59, 59, 1, tzinfo=timezone.utc)) - '2020-12-31T23:59:59.000001Z' - - >>> format_dcp2_datetime(datetime(9999, 1, 1, tzinfo=timezone.utc)) - '9999-01-01T00:00:00.000000Z' - - >>> format_dcp2_datetime(datetime(1, 1, 1, tzinfo=timezone.utc)) - '0001-01-01T00:00:00.000000Z' - - >>> format_dcp2_datetime(datetime(2020, 1, 1)) - Traceback (most recent call last): - ... - azul.RequirementError: 2020-01-01 00:00:00 - """ - require(str(d.tzinfo) == 'UTC', d) - date_string = datetime.strftime(d, dcp2_datetime_format) - # Work around https://bugs.python.org/issue13305 - date_string = ('0000' + date_string)[-31:] - assert date_string.endswith('+0000'), date_string - return date_string[:-5] + 'Z' - - -def parse_dcp2_datetime(s: str) -> datetime: - """ - Convert a '2020-01-01T00:00:00.000000Z' formatted string into a tz-aware - (UTC) datetime. - - >>> parse_dcp2_datetime('2020-01-01T00:00:00.000000Z') - datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) - - >>> parse_dcp2_datetime('0001-01-01T00:00:00.000000Z') - datetime.datetime(1, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) - - >>> parse_dcp2_datetime('2020-01-01T00:00:00.000000') - Traceback (most recent call last): - ... - ValueError: time data '2020-01-01T00:00:00.000000' does not match format '%Y-%m-%dT%H:%M:%S.%f%z' - """ - return datetime.strptime(s, dcp2_datetime_format) - - -def parse_dcp2_version(s: str) -> datetime: - """ - Convert a dcp2 `version` string into a tz-aware (UTC) datetime. - - https://github.com/HumanCellAtlas/dcp2/blob/main/docs/dcp2_system_design.rst#312object-naming - - >>> parse_dcp2_version('2020-01-01T00:00:00.123456Z') - datetime.datetime(2020, 1, 1, 0, 0, 0, 123456, tzinfo=datetime.timezone.utc) - - >>> parse_dcp2_version('2020-01-01t00:00:00.123456Z') - Traceback (most recent call last): - ... - ValueError: Invalid version value '2020-01-01t00:00:00.123456Z' - - >>> parse_dcp2_version('2020-1-01T00:00:00.123456Z') - Traceback (most recent call last): - ... - ValueError: Invalid version value '2020-1-01T00:00:00.123456Z' - - >>> parse_dcp2_version('2020-01-01T00:00:00.12345Z') - Traceback (most recent call last): - ... - ValueError: Invalid version value '2020-01-01T00:00:00.12345Z' - """ - pattern = r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z' - if re.fullmatch(pattern, s): - return parse_dcp2_datetime(s) - else: - raise ValueError(f'Invalid version value {s!r}') diff --git a/src/azul/uuids.py b/src/azul/uuids.py deleted file mode 100644 index 2aaf6a381c..0000000000 --- a/src/azul/uuids.py +++ /dev/null @@ -1,270 +0,0 @@ -from hashlib import ( - sha1, -) -import math -from typing import ( - ClassVar, - Self, -) -from uuid import ( - UUID, -) - -import attr - -from azul import ( - reject, - require, -) -from azul.types import ( - JSON, - MutableJSON, - json_int, -) - - -class InvalidUUIDError(Exception): - - def __init__(self, uuid: str, *args): - super().__init__(f'{uuid!r} is not a valid UUID.', *args) - - -class InvalidUUIDVersionError(InvalidUUIDError): - - def __init__(self, uuid: UUID): - super().__init__(str(uuid), f'Not a valid RFC-4122 UUID (undefined version {uuid.version}).') - - -class InvalidUUIDPrefixError(Exception): - - def __init__(self, prefix: str): - super().__init__(f'{prefix!r} is not a valid UUID prefix.') - - -def validate_uuid(uuid_str: str) -> None: - """ - >>> validate_uuid('8f53d355-b2fa-4bab-a2f2-6852d852d2ec') - - >>> validate_uuid('foo') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDError: 'foo' is not a valid UUID. - - >>> validate_uuid('8F53d355-b2fa-4bab-a2f2-6852d852d2ec') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDError: '8F53d355-b2fa-4bab-a2f2-6852d852d2ec' is not a valid UUID. - - >>> validate_uuid('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDVersionError: ("'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' is not a valid UUID.", - 'Not a valid RFC-4122 UUID (undefined version 10).') - """ - try: - formatted_uuid = UUID(uuid_str) - except ValueError: - raise InvalidUUIDError(uuid_str) - else: - if str(formatted_uuid) != uuid_str: - raise InvalidUUIDError(uuid_str) - if formatted_uuid.version not in (1, 3, 4, 5): - raise InvalidUUIDVersionError(formatted_uuid) - - -def validate_uuid_prefix(uuid_prefix: str) -> None: - """ - # The empty string is a valid prefix - >>> validate_uuid_prefix('') - - >>> validate_uuid_prefix('8f53') - - # A complete UUID is a valid prefix - >>> validate_uuid_prefix('8f53d355-b2fa-4bab-a2f2-6852d852d2ec') - - >>> validate_uuid_prefix('8F53') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDPrefixError: '8F53' is not a valid UUID prefix. - - >>> validate_uuid_prefix('8') - - >>> validate_uuid_prefix('8f538f53') - - >>> validate_uuid_prefix('8f538f5-') - Traceback (most recent call last): - ... - azul.RequirementError: UUID prefix ends with an invalid character: 8f538f5- - - >>> validate_uuid_prefix('8f538f-') - Traceback (most recent call last): - ... - azul.RequirementError: UUID prefix ends with an invalid character: 8f538f- - - >>> validate_uuid_prefix('8f538f53a') - Traceback (most recent call last): - ... - azul.uuids.InvalidUUIDPrefixError: '8f538f53a' is not a valid UUID prefix. - """ - valid_uuid_str = '26a8fccd-bbd2-4342-9c19-6ed7c9bb9278' - reject(uuid_prefix.endswith('-'), - f'UUID prefix ends with an invalid character: {uuid_prefix}') - try: - validate_uuid(uuid_prefix + valid_uuid_str[len(uuid_prefix):]) - except InvalidUUIDError: - raise InvalidUUIDPrefixError(uuid_prefix) - - -def change_version(uuid: str, old_version: int, new_version: int) -> str: - """ - >>> change_version('d36eb64f-162c-4b8f-bb17-069e2fd2b208', 1, 10) - Traceback (most recent call last): - ... - AssertionError: ('d36eb64f-162c-4b8f-bb17-069e2fd2b208', 4, 1) - >>> change_version('d36eb64f-162c-4b8f-bb17-069e2fd2b208', 4, 10) - 'd36eb64f-162c-ab8f-bb17-069e2fd2b208' - """ - assert 1 <= new_version < 16, new_version - if old_version in (1, 3, 4, 5): - validate_uuid(uuid) - prefix, version, suffix = uuid[:14], uuid[14], uuid[15:] - version = int(version, 16) - assert version == old_version, (uuid, version, old_version) - uuid = f'{prefix}{new_version:x}{suffix}' - assert UUID(uuid).version == new_version, (uuid, old_version) - if new_version in (1, 3, 4, 5): - validate_uuid(uuid) - return uuid - - -class UUIDPartitionMeta(type): - - def __init__(cls, *args, **kwargs): - super().__init__(*args, **kwargs) - attr.s(frozen=True, kw_only=True, auto_attribs=True)(cls) - cls.root = cls(prefix_length=0, prefix=0) - - -class UUIDPartition(metaclass=UUIDPartitionMeta): - """ - A binary partitioning of the UUID space. Most partitionings of the UUID - space use a prefix of the hexadecimal representation of UUIDs. This class - uses the binary representation and is therefore more granular. - """ - prefix_length: int - prefix: int - - root: ClassVar[Self] # see metaclass above - - # This stub is only needed to aid PyCharm's type inference. Without this, - # a constructor invocation that doesn't refer to the class explicitly, but - # through a variable will cause a warning. I suspect a bug in PyCharm: - # - # https://youtrack.jetbrains.com/issue/PY-44728 - # - # noinspection PyDataclass - def __init__(self, *, prefix_length: int, prefix: int) -> None: ... - - def __attrs_post_init__(self): - reject(self.prefix_length == 0 and self.prefix != 0) - require(0 <= self.prefix < 2 ** self.prefix_length) - - @classmethod - def from_json(cls, json: JSON) -> Self: - return cls(prefix_length=json_int(json['prefix_length']), - prefix=json_int(json['prefix'])) - - def to_json(self) -> MutableJSON: - return { - 'prefix_length': self.prefix_length, - 'prefix': self.prefix - } - - def contains(self, member: UUID) -> bool: - """ - >>> p = UUIDPartition(prefix_length=7, prefix=0b0111_1111) - >>> p.contains(UUID('fdd4524e-14c4-41d7-9071-6cadab09d75c')) - False - >>> p.contains(UUID('fed4524e-14c4-41d7-9071-6cadab09d75c')) - True - >>> p.contains(UUID('ffd4524e-14c4-41d7-9071-6cadab09d75c')) - True - """ - # UUIDs are 128 bit integers - shift = 128 - self.prefix_length - return member.int >> shift == self.prefix - - def divide(self, num_divisions: int) -> list[Self]: - """ - Divide this partition into a set of at least the given number of - sub-partitions. The length of the return value will always be the - smallest a power of two that is greater than ``num_divisions`. - - >>> sorted(UUIDPartition.root.divide(3)) - ... # doctest: +NORMALIZE_WHITESPACE - [UUIDPartition(prefix_length=2, prefix=0),\ - UUIDPartition(prefix_length=2, prefix=1),\ - UUIDPartition(prefix_length=2, prefix=2),\ - UUIDPartition(prefix_length=2, prefix=3)] - """ - prefix_length = math.ceil(math.log2(num_divisions)) - num_divisions = 2 ** prefix_length - cls = type(self) - return [ - cls(prefix_length=self.prefix_length + prefix_length, - prefix=(self.prefix << prefix_length) + prefix) - for prefix in range(num_divisions) - ] - - def __str__(self) -> str: - """ - Represent this partition as a hexadecimal range. This range can be used - to visually tell wether this partition contains a particular UUID: it - does, if the UUID starts with any hexadecimal sequence in the range - returned by this function. - - >>> str(UUIDPartition.root) - '-' - - 0b1111_1110 == 0xfe - 0b1111_1111 == 0xff - >>> str(UUIDPartition(prefix_length=7, prefix=0b1111_111)) - 'fe-ff' - - Leading zeroes in the high and low end of the range: - - 0b0000_1110 == 0x0e - 0b0000_1111 == 0x0f - >>> str(UUIDPartition(prefix_length=7, prefix=0b0000_111)) - '0e-0f' - - A partition twice as big (a binary prefix that's one bit shorter): - - 0b0000_1100 = 0x0c - 0b0000_1101 = 0x0d - 0b0000_1110 = 0x0e - 0b0000_1111 = 0x0f - >>> str(UUIDPartition(prefix_length=6, prefix=0b0000_11)) - '0c-0f' - """ - shift = 4 - self.prefix_length % 4 # shift to align at nibble boundary - all_ones = (1 << shift) - 1 - lo = self.prefix << shift - hi = lo + all_ones - - hex_len = (self.prefix_length + 3) // 4 - - def hex(i): - return format(i, f'0{hex_len}x')[:hex_len] - - return '-'.join(map(hex, (lo, hi))) - - -def uuid5_for_bytes(namespace: UUID, name: bytes) -> UUID: - """ - Generate a UUID from the SHA-1 hash of a namespace UUID and a name. Same as - uuid.uuid5 but takes `bytes` not `str`, and thereby avoids assuming an - encoding (uuid.uuid5 assumes UTF-8). - """ - hash = sha1(namespace.bytes + name).digest() - return UUID(bytes=hash[:16], version=5) diff --git a/src/azul/vendored/README.md b/src/azul/vendored/README.md deleted file mode 100644 index f3a5c370c9..0000000000 --- a/src/azul/vendored/README.md +++ /dev/null @@ -1,11 +0,0 @@ -This directory contains vendored distributions. - -Please link each vendored distribution along with a link to the tree at the -exact commit when the tree was copied. - -* [frozendict](https://github.com/slezica/python-frozendict/tree/7e078bf084ee734367dde8db2c8a2f00ec37375f) - -Place the vendored distributions' license into LICENSE at the project root. - -Try to maintain the package path. If the unvendored distribution introduces the -package `foo.bar`, the vendored package should be at `azul.vendored.foo.bar` diff --git a/src/azul/vendored/__init__.py b/src/azul/vendored/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/humancellatlas/data/metadata/age_range.py b/src/humancellatlas/data/metadata/age_range.py deleted file mode 100644 index 8418f59738..0000000000 --- a/src/humancellatlas/data/metadata/age_range.py +++ /dev/null @@ -1,113 +0,0 @@ -from dataclasses import ( - dataclass, -) -from typing import ( - Self, -) - - -@dataclass(frozen=True) -class AgeRange: - """ - >>> AgeRange.parse(' 1 - 2 ', 'second') - AgeRange(min=1.0, max=2.0) - - >>> AgeRange.parse(' - ', 'second') - AgeRange(min=0.0, max=inf) - - >>> AgeRange.parse('', 'years') - AgeRange(min=0.0, max=inf) - - >>> r = AgeRange.parse('0-1', 'year'); r - AgeRange(min=0.0, max=31536000.0) - >>> 365 * 24 * 60 * 60 == r.max - True - - >>> AgeRange.parse('1-', 'seconds') - AgeRange(min=1.0, max=inf) - - >>> AgeRange.parse('-2', 'seconds') - AgeRange(min=0.0, max=2.0) - - >>> AgeRange.parse('', 'blink') - Traceback (most recent call last): - ... - ValueError: Cannot convert age '' with unit 'blink' to an AgeRange object - - >>> AgeRange.parse(' 1 - 2 ', 'blinks') - Traceback (most recent call last): - ... - ValueError: Cannot convert age ' 1 - 2 ' with unit 'blinks' to an AgeRange object - - >>> AgeRange.parse('1-2-3', 'hours') - Traceback (most recent call last): - ... - ValueError: Cannot convert age '1-2-3' with unit 'hours' to an AgeRange object - - >>> AgeRange.parse('one-2', 'days') - Traceback (most recent call last): - ... - ValueError: Cannot convert age 'one-2' with unit 'days' to an AgeRange object - - >>> AgeRange(0, 1.0) - Traceback (most recent call last): - ... - TypeError: ('Constructor arguments must be float values', 0) - - >>> AgeRange(0.0, 1) - Traceback (most recent call last): - ... - TypeError: ('Constructor arguments must be float values', 1) - """ - min: float - max: float - - FACTORS = dict(year=365.0 * 24 * 3600, - month=365.0 * 24 * 3600 / 12, - week=7.0 * 24 * 3600, - day=24.0 * 3600, - hour=3600.0, - minute=60.0, - second=1.0) - - @classmethod - def parse(cls, age: str, unit: str) -> Self: - def fail(): - return ValueError(f"Cannot convert age '{age}' with unit '{unit}' to an AgeRange object") - - age_ = [s.strip() for s in age.split('-')] - unit_ = unit.lower().strip() - - try: - factor = cls.FACTORS[unit_] - except KeyError as e1: - if unit_.endswith('s'): - try: - factor = cls.FACTORS[unit_[:-1]] - except KeyError as e2: - raise fail() from e2 - else: - raise fail() from e1 - - def cvt(value: str, default: float) -> float | None: - assert isinstance(default, float) - try: - return factor * float(value) if value else default - except ValueError as e: - raise fail() from e - - if len(age_) in (1, 2): - self = cls(min=cvt(age_[0], 0.0), max=cvt(age_[-1], float('inf'))) - return self - else: - raise fail() - - def __post_init__(self): - for v in self.min, self.max: - if not isinstance(v, float): - raise TypeError("Constructor arguments must be float values", v) - - any = None - - -AgeRange.any = AgeRange(min=0.0, max=float('inf')) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py deleted file mode 100644 index 757bc8a70e..0000000000 --- a/src/humancellatlas/data/metadata/api.py +++ /dev/null @@ -1,1165 +0,0 @@ -from abc import ( - ABCMeta, - abstractmethod, -) -from collections import ( - defaultdict, -) -from dataclasses import ( - dataclass, - field, - fields, -) -from datetime import ( - datetime, - timezone, -) -from itertools import ( - chain, -) -from typing import ( - Collection, - Iterable, - Mapping, - MutableMapping, - Self, - TypeVar, -) -from uuid import ( - UUID, -) -import warnings - -from furl import ( - furl, -) - -from azul import ( - R, - cached_property, -) -from azul.collections import ( - OrderedSet, - adict, - dict_merge, -) - -from azul.indexer.document import ( - EntityReference, -) -from azul.types import ( - JSON, - MutableJSON, - is_optional, -) -from humancellatlas.data.metadata.age_range import ( - AgeRange, -) -from humancellatlas.data.metadata.datetime import ( - parse_jsonschema_date_time, -) -from humancellatlas.data.metadata.lookup import ( - LookupDefault, - lookup, -) - -# A few helpful type aliases -# -UUID4 = UUID - - -@dataclass(init=False) -class ManifestEntry: - json: JSON = field(init=False, repr=False) - content_type: str = field(init=False) - crc32c: str - indexed: bool - name: str - s3_etag: str | None - sha1: str | None - sha256: str - size: int - # only populated if bundle was requested with `directurls` or `directurls` set - url: str | None - uuid: UUID4 = field(init=False) - # FIXME: Change Bundle.version and ManifestEntry.version from string to datetime - # https://github.com/DataBiosphere/hca-metadata-api/issues/48 - version: str - - def __init__(self, json: MutableJSON): - # '/' was once forbidden in file paths and was encoded with '!'. Now - # '/' is allowed and we force it in the metadata so that backwards - # compatibility is simplified downstream. - json['name'] = json['name'].replace('!', '/') - self.json = json - self.content_type = json['content-type'] - self.uuid = UUID4(json['uuid']) - for f in fields(self): - if f.init: - value = json.get(f.name) - if value is None and not is_optional(f.type): - raise TypeError('Property cannot be absent or None', f.name) - else: - setattr(self, f.name, value) - - -valid_schema_domains = ['schema.humancellatlas.org'] - - -@dataclass(init=False) -class Entity: - json: JSON = field(repr=False) - document_id: UUID4 - submitter_id: str | None - submission_date: datetime - update_date: datetime | None - - @classmethod - def from_json(cls, - json: JSON, - **kwargs): - content = json.get('content', json) - described_by = cls.validate_described_by(content) - schema_name = described_by.path.segments[-1] - try: - sub_cls = entity_types[schema_name] - except KeyError: - raise TypeLookupError(described_by) - return sub_cls(json, **kwargs) - - @classmethod - def validate_described_by(cls, content: JSON) -> furl: - described_by = furl(content['describedBy']) - assert described_by.netloc in valid_schema_domains, R( - 'Unexpected schema domain', - described_by, - valid_schema_domains - ) - return described_by - - def __init__(self, - json: JSON - ) -> None: - super().__init__() - self.json = json - provenance = json.get('hca_ingest') or json['provenance'] - self.document_id = UUID4(provenance['document_id']) - self.submitter_id = provenance.get('submitter_id') - submission_date = lookup(provenance, 'submission_date', 'submissionDate') - self.submission_date = self._datetime(submission_date) - update_date = lookup(provenance, 'update_date', 'updateDate', default=None) - self.update_date = self._optional_datetime(update_date) - - def _datetime(self, s: str) -> datetime: - return parse_jsonschema_date_time(s).astimezone(timezone.utc) - - def _optional_datetime(self, s: str | None) -> datetime | None: - return s if s is None else self._datetime(s) - - @property - def ref(self) -> EntityReference: - return EntityReference(entity_type=self.schema_name, - entity_id=str(self.document_id)) - - @property - def schema_name(self): - return schema_names[type(self)] - - def accept(self, visitor: 'EntityVisitor') -> None: - visitor.visit(self) - - -# A type variable for subtypes of Entity -# -E = TypeVar('E', bound=Entity) - - -class TypeLookupError(Exception): - - def __init__(self, described_by: furl) -> None: - super().__init__(f"No entity type for schema URL '{described_by}'") - - -class EntityVisitor(metaclass=ABCMeta): - - @abstractmethod - def visit(self, entity: 'Entity') -> None: - raise NotImplementedError() - - -@dataclass(init=False) -class LinkedEntity(Entity, metaclass=ABCMeta): - children: MutableMapping[UUID4, Entity] = field(repr=False) - parents: MutableMapping[UUID4, 'LinkedEntity'] = field(repr=False) - - @abstractmethod - def _connect_to(self, other: Entity, forward: bool) -> None: - raise NotImplementedError() - - def __init__(self, - json: JSON - ) -> None: - super().__init__(json) - self.children = {} - self.parents = {} - - def connect_to(self, other: Entity, forward: bool) -> None: - mapping = self.children if forward else self.parents - mapping[other.document_id] = other - self._connect_to(other, forward) - - def ancestors(self, visitor: EntityVisitor): - for parent in self.parents.values(): - parent.ancestors(visitor) - visitor.visit(parent) - - def accept(self, visitor: EntityVisitor): - super().accept(visitor) - for child in self.children.values(): - child.accept(visitor) - - -class LinkError(RuntimeError): - - def __init__(self, entity: LinkedEntity, other_entity: Entity, forward: bool) -> None: - super().__init__( - f'{entity.ref} cannot {"reference" if forward else "be referenced by"} ' - f'{other_entity.ref}' - ) - - -L = TypeVar('L', bound=LinkedEntity) - - -@dataclass(frozen=True) -class ProjectPublication: - title: str - url: str | None - official_hca: bool | None - doi: str | None - - @classmethod - def from_json(cls, json: JSON) -> Self: - title = lookup(json, 'title', 'publication_title') - url = lookup(json, 'url', 'publication_url', default=None) - return cls(title=title, - url=url, - official_hca=json.get('official_hca_publication'), - doi=json.get('doi')) - - @property - def publication_title(self): - warnings.warn("ProjectPublication.publication_title is deprecated. " - "Use ProjectPublication.title instead.", DeprecationWarning) - return self.title - - @property - def publication_url(self): - warnings.warn("ProjectPublication.publication_url is deprecated. " - "Use ProjectPublication.url instead.", DeprecationWarning) - return self.url - - -@dataclass(frozen=True) -class ProjectContact: - name: str - email: str | None - institution: str | None # optional up to project/5.3.0/contact - laboratory: str | None - corresponding_contributor: bool | None - project_role: str | None - - @classmethod - def from_json(cls, json: JSON) -> Self: - project_role = json.get('project_role') - project_role = ontology_label(project_role) if isinstance(project_role, dict) else project_role - return cls(name=lookup(json, 'name', 'contact_name'), - email=json.get('email'), - institution=json.get('institution'), - laboratory=json.get('laboratory'), - corresponding_contributor=json.get('corresponding_contributor'), - project_role=project_role) - - @property - def contact_name(self) -> str: - warnings.warn("ProjectContact.contact_name is deprecated. " - "Use ProjectContact.name instead.", DeprecationWarning) - return self.name - - -@dataclass(eq=True, frozen=True) -class Accession: - namespace: str - accession: str - - -@dataclass(eq=True, frozen=True) -class Bionetwork: - name: str - atlas_project: bool | None = None - hca_tissue_atlas: str | None = None - hca_tissue_atlas_version: str | None = None - schema_version: str | None = None - - -@dataclass(init=False) -class Project(Entity): - project_short_name: str - project_title: str - project_description: str | None # optional up to core/project/5.2.2/project_core - publications: OrderedSet[ProjectPublication] - contributors: OrderedSet[ProjectContact] - accessions: OrderedSet[Accession] - supplementary_links: OrderedSet[str] - estimated_cell_count: int | None - bionetworks: OrderedSet[Bionetwork] - data_use_restriction: str | None - duos_id: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - core = content['project_core'] - self.project_short_name = lookup(core, 'project_short_name', 'project_shortname') - self.project_title = core['project_title'] - self.project_description = core.get('project_description') - self.publications = OrderedSet(ProjectPublication.from_json(publication) - for publication in content.get('publications', [])) - self.contributors = OrderedSet(ProjectContact.from_json(contributor) - for contributor in content.get('contributors', [])) - self.supplementary_links = OrderedSet(content.get('supplementary_links', [])) - self.estimated_cell_count = content.get('estimated_cell_count') - accessions = OrderedSet() - for name, value in content.items(): - prefix, _, suffix = name.rpartition('_') - if suffix == 'accessions': - assert prefix, name - assert isinstance(value, list) - accessions.update(Accession(namespace=prefix, accession=v) for v in value) - self.accessions = accessions - self.bionetworks = OrderedSet(Bionetwork(**bionetwork) - for bionetwork in content.get('hca_bionetworks', ()) - if bionetwork) - self.data_use_restriction = content.get('data_use_restriction') - self.duos_id = content.get('duos_id') - - def _accessions(self, namespace: str) -> set[str]: - return {a.accession for a in self.accessions if a.namespace == namespace} - - @property - def insdc_project_accessions(self) -> set[str]: - warnings.warn("Project.insdc_project_accessions is deprecated. " - "Use Project.accessions instead.", DeprecationWarning) - return self._accessions('insdc_project') - - @property - def geo_series_accessions(self) -> set[str]: - warnings.warn("Project.geo_series_accessions is deprecated. " - "Use Project.accessions instead.", DeprecationWarning) - return self._accessions('geo_series') - - @property - def array_express_accessions(self) -> set[str]: - warnings.warn("Project.array_express_accessions is deprecated. " - "Use Project.accessions instead.", DeprecationWarning) - return self._accessions('array_express') - - @property - def insdc_study_accessions(self) -> set[str]: - warnings.warn("Project.insdc_study_accessions is deprecated. " - "Use Project.accessions instead.", DeprecationWarning) - return self._accessions('insdc_study') - - @property - def laboratory_names(self) -> set: - warnings.warn("Project.laboratory_names is deprecated. " - "Use contributors.laboratory instead.", DeprecationWarning) - return {contributor.laboratory for contributor in self.contributors if contributor.laboratory} - - @property - def project_shortname(self) -> str: - warnings.warn("Project.project_shortname is deprecated. " - "Use project_short_name instead.", DeprecationWarning) - return self.project_short_name - - -@dataclass(init=False) -class Biomaterial(LinkedEntity): - biomaterial_id: str - ncbi_taxon_id: list[int] - has_input_biomaterial: str | None - from_processes: MutableMapping[UUID4, 'Process'] = field(repr=False) - to_processes: MutableMapping[UUID4, 'Process'] - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.biomaterial_id = content['biomaterial_core']['biomaterial_id'] - self.ncbi_taxon_id = content['biomaterial_core']['ncbi_taxon_id'] - self.has_input_biomaterial = content['biomaterial_core'].get('has_input_biomaterial') - self.from_processes = {} - self.to_processes = {} - - def _connect_to(self, other: Entity, forward: bool) -> None: - if isinstance(other, Process): - if forward: - self.to_processes[other.document_id] = other - else: - self.from_processes[other.document_id] = other - else: - raise LinkError(self, other, forward) - - -@dataclass(init=False) -class DonorOrganism(Biomaterial): - genus_species: set[str] - diseases: set[str] - organism_age: str - organism_age_unit: str - sex: str - development_stage: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.genus_species = {ontology_label(gs) for gs in content['genus_species']} - self.diseases = {ontology_label(d) for d in lookup(content, 'diseases', 'disease', default=[]) if d} - self.organism_age = content.get('organism_age') - self.organism_age_unit = ontology_label(content.get('organism_age_unit'), default=None) - self.sex = lookup(content, 'sex', 'biological_sex') - self.development_stage = ontology_label(content.get('development_stage'), default=None) - - @property - def organism_age_in_seconds(self) -> AgeRange | None: - if self.organism_age and self.organism_age_unit: - return AgeRange.parse(self.organism_age, self.organism_age_unit) - else: - return None - - @property - def biological_sex(self): - warnings.warn("DonorOrganism.biological_sex is deprecated. " - "Use DonorOrganism.sex instead.", DeprecationWarning) - return self.sex - - @property - def disease(self): - warnings.warn("DonorOrganism.disease is deprecated. " - "Use DonorOrganism.diseases instead.", DeprecationWarning) - return self.diseases - - -@dataclass(init=False) -class SpecimenFromOrganism(Biomaterial): - storage_method: str | None - preservation_method: str | None - diseases: set[str] - organ: str | None - organ_parts: set[str] - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - preservation_storage = content.get('preservation_storage') - self.storage_method = preservation_storage.get('storage_method') if preservation_storage else None - self.preservation_method = preservation_storage.get('preservation_method') if preservation_storage else None - self.diseases = {ontology_label(d) for d in lookup(content, 'diseases', 'disease', default=[]) if d} - self.organ = ontology_label(content.get('organ'), default=None) - - organ_parts = lookup(content, 'organ_parts', 'organ_part', default=[]) - if not isinstance(organ_parts, list): - organ_parts = [organ_parts] - assert isinstance(organ_parts, list) - self.organ_parts = {ontology_label(d) for d in organ_parts if d} - - @property - def disease(self): - warnings.warn("SpecimenFromOrganism.disease is deprecated. " - "Use SpecimenFromOrganism.diseases instead.", DeprecationWarning) - return self.diseases - - @property - def organ_part(self): - msg = ("SpecimenFromOrganism.organ_part has been removed. " - "Use SpecimenFromOrganism.organ_parts instead.") - warnings.warn(msg, DeprecationWarning) - raise AttributeError(msg) - - -@dataclass(init=False) -class ImagedSpecimen(Biomaterial): - slice_thickness: float | int - - def __init__(self, json: JSON) -> None: - super().__init__(json) - self.slice_thickness = json['slice_thickness'] - - -@dataclass(init=False) -class CellSuspension(Biomaterial): - estimated_cell_count: int | None - selected_cell_types: set[str] - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.estimated_cell_count = lookup(content, 'estimated_cell_count', 'total_estimated_cells', default=None) - self.selected_cell_types = {ontology_label(sct) for sct in - lookup(content, 'selected_cell_types', 'selected_cell_type', default=[])} - - @property - def total_estimated_cells(self) -> int: - warnings.warn("CellSuspension.total_estimated_cells is deprecated. " - "Use CellSuspension.estimated_cell_count instead.", DeprecationWarning) - return self.estimated_cell_count - - @property - def selected_cell_type(self) -> set[str]: - warnings.warn("CellSuspension.selected_cell_type is deprecated. " - "Use CellSuspension.selected_cell_types instead.", DeprecationWarning) - return self.selected_cell_types - - -@dataclass(init=False) -class CellLine(Biomaterial): - type: str - model_organ: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.type = lookup(content, 'type', 'cell_line_type') - self.model_organ = ontology_label(content.get('model_organ'), default=None) - - @property - def cell_line_type(self) -> str: - warnings.warn("CellLine.cell_line_type is deprecated. " - "Use CellLine.type instead.", DeprecationWarning) - return self.type - - -@dataclass(init=False) -class Organoid(Biomaterial): - model_organ: str - model_organ_part: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.model_organ = ontology_label(lookup(content, 'model_organ', 'model_for_organ'), default=None) - self.model_organ_part = ontology_label(content.get('model_organ_part'), default=None) - - -@dataclass(init=False) -class Process(LinkedEntity): - process_id: str - process_name: str | None - input_biomaterials: MutableMapping[UUID4, Biomaterial] = field(repr=False) - input_files: MutableMapping[UUID4, 'File'] = field(repr=False) - output_biomaterials: MutableMapping[UUID4, Biomaterial] - output_files: MutableMapping[UUID4, 'File'] - protocols: MutableMapping[UUID4, 'Protocol'] - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - process_core = content['process_core'] - self.process_id = process_core['process_id'] - self.process_name = process_core.get('process_name') - self.input_biomaterials = {} - self.input_files = {} - self.output_biomaterials = {} - self.output_files = {} - self.protocols = {} - - def _connect_to(self, other: Entity, forward: bool) -> None: - if isinstance(other, Biomaterial): - biomaterials = self.output_biomaterials if forward else self.input_biomaterials - biomaterials[other.document_id] = other - elif isinstance(other, File): - files = self.output_files if forward else self.input_files - files[other.document_id] = other - elif isinstance(other, Protocol): - if forward: - self.protocols[other.document_id] = other - else: - raise LinkError(self, other, forward) - else: - raise LinkError(self, other, forward) - - def is_sequencing_process(self): - return any(isinstance(pl, SequencingProtocol) for pl in self.protocols.values()) - - -@dataclass(init=False) -class AnalysisProcess(Process): - pass - - -@dataclass(init=False) -class DissociationProcess(Process): - - def __init__(self, json: JSON) -> None: - warnings.warn(f"{type(self)} is deprecated", DeprecationWarning) - super().__init__(json) - - -@dataclass(init=False) -class EnrichmentProcess(Process): - - def __init__(self, json: JSON) -> None: - warnings.warn(f"{type(self)} is deprecated", DeprecationWarning) - super().__init__(json) - - -@dataclass(init=False) -class LibraryPreparationProcess(Process): - library_construction_approach: str - - def __init__(self, json: JSON) -> None: - warnings.warn(f"{type(self)} is deprecated", DeprecationWarning) - super().__init__(json) - content = json.get('content', json) - self.library_construction_approach = content['library_construction_approach'] - - -@dataclass(init=False) -class SequencingProcess(Process): - instrument_manufacturer_model: str - - def __init__(self, json: JSON) -> None: - warnings.warn(f"{type(self)} is deprecated", DeprecationWarning) - super().__init__(json) - content = json.get('content', json) - self.instrument_manufacturer_model = ontology_label(content['instrument_manufacturer_model']) - - def is_sequencing_process(self): - return True - - -@dataclass(frozen=True) -class ImagingProbe: - assay_type: str - - @classmethod - def from_json(cls, json: JSON) -> Self: - assay_type = ontology_label(json['assay_type']) - return cls(assay_type=assay_type) - - -@dataclass(init=False) -class Protocol(LinkedEntity): - protocol_id: str - protocol_name: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - protocol_core = content['protocol_core'] - self.protocol_id = protocol_core['protocol_id'] - self.protocol_name = protocol_core.get('protocol_name') - - def _connect_to(self, other: Entity, forward: bool) -> None: - if isinstance(other, Process) and not forward: - pass # no explicit, typed back reference - else: - raise LinkError(self, other, forward) - - -@dataclass(init=False) -class LibraryPreparationProtocol(Protocol): - library_construction_method: str - nucleic_acid_source: str | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - temp = lookup(content, 'library_construction_method', 'library_construction_approach') - self.library_construction_method = ontology_label(temp) if isinstance(temp, dict) else temp - self.nucleic_acid_source = content.get('nucleic_acid_source') - - @property - def library_construction_approach(self) -> str: - warnings.warn("LibraryPreparationProtocol.library_construction_approach is deprecated. " - "Use LibraryPreparationProtocol.library_construction_method instead.", DeprecationWarning) - return self.library_construction_method - - -@dataclass(init=False) -class SequencingProtocol(Protocol): - instrument_manufacturer_model: str - paired_end: bool | None - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.instrument_manufacturer_model = ontology_label(content.get('instrument_manufacturer_model'), default=None) - self.paired_end = lookup(content, 'paired_end', 'paired_ends', default=None) - - -@dataclass(init=False) -class AnalysisProtocol(Protocol): - pass - - -@dataclass(init=False) -class AggregateGenerationProtocol(Protocol): - pass - - -@dataclass(init=False) -class CollectionProtocol(Protocol): - pass - - -@dataclass(init=False) -class DifferentiationProtocol(Protocol): - pass - - -@dataclass(init=False) -class DissociationProtocol(Protocol): - pass - - -@dataclass(init=False) -class EnrichmentProtocol(Protocol): - pass - - -@dataclass(init=False) -class IpscInductionProtocol(Protocol): - pass - - -@dataclass(init=False) -class TreatmentProtocol(Protocol): - pass - - -@dataclass(init=False) -class ImagingProtocol(Protocol): - probe: list[ImagingProbe] # A list so all the ImagingProbe objects can be tallied when indexed - - def __init__(self, json: JSON) -> None: - super().__init__(json) - content = json.get('content', json) - self.probe = [ - ImagingProbe.from_json(probe) - for probe in lookup(content, 'probe', 'target', default=[]) - ] - - @property - def target(self) -> list[ImagingProbe]: - warnings.warn('ImagingProtocol.target is deprecated. ' - 'Use ImagingProtocol.probe instead.', DeprecationWarning) - return self.probe - - -@dataclass(init=False) -class ImagingPreparationProtocol(Protocol): - pass - - -@dataclass(init=False) -class File(LinkedEntity): - format: str - from_processes: MutableMapping[UUID4, Process] = field(repr=False) - to_processes: MutableMapping[UUID4, Process] - manifest_entry: ManifestEntry - content_description: set[str] - file_source: str - - def __init__(self, - json: JSON, - manifest_entry: ManifestEntry): - super().__init__(json) - content = json.get('content', json) - # '/' was once forbidden in file paths and was encoded with '!'. Now - # '/' is allowed and we force it in the metadata so that backwards - # compatibility is simplified downstream. - core = content['file_core'] - core['file_name'] = core['file_name'].replace('!', '/') - self.format = lookup(core, 'format', 'file_format') - self.manifest_entry = manifest_entry - self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])} - self.file_source = core.get('file_source') - self.from_processes = dict() - self.to_processes = dict() - - def _connect_to(self, other: Entity, forward: bool) -> None: - if isinstance(other, Process): - if forward: - self.to_processes[other.document_id] = other - else: - self.from_processes[other.document_id] = other - else: - raise LinkError(self, other, forward) - - @property - def file_format(self) -> str: - warnings.warn("File.file_format is deprecated. " - "Use File.format instead.", DeprecationWarning) - return self.format - - @property - def is_matrix(self) -> bool: - return any('matrix' in c.lower() for c in self.content_description) - - -@dataclass(init=False) -class SequenceFile(File): - read_index: str - lane_index: str | None - - def __init__(self, - json: JSON, - manifest_entry: ManifestEntry): - super().__init__(json, manifest_entry) - content = json.get('content', json) - self.read_index = content['read_index'] - self.lane_index = content.get('lane_index') - - -@dataclass(init=False) -class SupplementaryFile(File): - pass - - -@dataclass(init=False) -class AnalysisFile(File): - matrix_cell_count: int - - def __init__(self, - json: JSON, - manifest_entry: ManifestEntry): - super().__init__(json, manifest_entry) - content = json.get('content', json) - self.matrix_cell_count = content.get('matrix_cell_count') - - -@dataclass(init=False) -class ReferenceFile(File): - pass - - -@dataclass(init=False) -class ImageFile(File): - pass - - -@dataclass -class Link: - source_id: UUID4 - source_type: str - destination_id: UUID4 - destination_type: str - link_type: str = 'process_link' - - @classmethod - def from_json(cls, json: JSON, schema_version: tuple[int, ...]) -> Iterable['Link']: - if 'source_id' in json: - # DCP/1 v5 (obsolete) - yield cls(source_id=UUID4(json['source_id']), - source_type=json['source_type'], - destination_id=UUID4(json['destination_id']), - destination_type=json['destination_type']) - elif schema_version[0] == 1: - # DCP/1 vx (current) - process_id = UUID4(json['process']) - for source_id in json['inputs']: - yield cls(source_id=UUID4(source_id), - source_type=json['input_type'], - destination_id=process_id, - destination_type='process') - for destination_id in json['outputs']: - yield cls(source_id=process_id, - source_type='process', - destination_id=UUID4(destination_id), - destination_type=json['output_type']) - for protocol in json['protocols']: - yield cls(source_id=process_id, - source_type='process', - destination_id=UUID4(protocol['protocol_id']), - destination_type=lookup(protocol, 'type', 'protocol_type')) - elif schema_version[0] in (2, 3): - # DCP/2 (current) - link_type = json['link_type'] - if link_type == 'process_link': - process_id = UUID4(json['process_id']) - process_type = json['process_type'] - for input_ in json['inputs']: - yield cls(link_type=link_type, - source_id=UUID4(input_['input_id']), - source_type=input_['input_type'], - destination_id=process_id, - destination_type=process_type) - for output in json['outputs']: - yield cls(link_type=link_type, - source_id=process_id, - source_type=process_type, - destination_id=UUID4(output['output_id']), - destination_type=output['output_type']) - for protocol in json['protocols']: - yield cls(link_type=link_type, - source_id=process_id, - source_type=process_type, - destination_id=UUID4(protocol['protocol_id']), - destination_type=protocol['protocol_type']) - elif link_type == 'supplementary_file_link': - entity = json['entity'] - for supp_file in json['files']: - yield cls(link_type=link_type, - source_id=UUID4(entity['entity_id']), - source_type=entity['entity_type'], - destination_id=UUID4(supp_file['file_id']), - destination_type=supp_file['file_type']) - else: - assert False, f'Unknown link_type {link_type}' - else: - assert False, f'Unknown schema_version {schema_version}' - - -@dataclass(init=False) -class Bundle: - uuid: UUID4 - # FIXME: Change Bundle.version and ManifestEntry.version from string to datetime - # https://github.com/DataBiosphere/hca-metadata-api/issues/48 - version: str - projects: MutableMapping[UUID4, Project] - biomaterials: MutableMapping[UUID4, Biomaterial] - processes: MutableMapping[UUID4, Process] - protocols: MutableMapping[UUID4, Protocol] - files: MutableMapping[UUID4, File] - - manifest: MutableMapping[str, ManifestEntry] - entities: MutableMapping[UUID4, Entity] = field(repr=False) - links: list[Link] - - def __init__(self, - uuid: str, - version: str, - manifest: Mapping[str, MutableJSON], - metadata: Mapping[str, MutableJSON], - links_json: JSON, - stitched_entity_ids: Collection[str] = ()): - self.uuid = UUID4(uuid) - self.version = version - self.manifest = {ref: ManifestEntry(e) for ref, e in manifest.items()} - self.stitched = frozenset(map(UUID4, stitched_entity_ids)) - - entity_args_by_core_cls: MutableMapping[type[E], list[dict]] = defaultdict(list) - for key, json in metadata.items(): - schema_name = EntityReference.parse(key).entity_type - entity_cls = entity_types[schema_name] - core_cls = core_types[entity_cls] - args = adict(json=json, - manifest_entry=self.manifest.get(key)) - entity_args_by_core_cls[core_cls].append(args) - - def from_json_vx(core_cls: type[E], - ) -> MutableMapping[UUID4, E]: - args_list = entity_args_by_core_cls[core_cls] - entities = ( - core_cls.from_json(**args) - for args in args_list - ) - return {entity.document_id: entity for entity in entities} - - self.projects = from_json_vx(Project) - self.biomaterials = from_json_vx(Biomaterial) - self.processes = from_json_vx(Process) - self.protocols = from_json_vx(Protocol) - self.files = from_json_vx(File) - - self.entities = {**self.projects, **self.biomaterials, **self.processes, **self.protocols, **self.files} - - Entity.validate_described_by(links_json) - schema_version = tuple(map(int, links_json['schema_version'].split('.'))) - self.links = list(chain.from_iterable( - Link.from_json(link, schema_version) - for link in links_json['links'] - )) - - for link in self.links: - if link.link_type == 'process_link': - source_entity = self.entities[link.source_id] - destination_entity = self.entities[link.destination_id] - assert isinstance(source_entity, LinkedEntity) - assert isinstance(destination_entity, LinkedEntity) - source_entity.connect_to(destination_entity, forward=True) - destination_entity.connect_to(source_entity, forward=False) - - def root_entities(self) -> Mapping[UUID4, LinkedEntity]: - roots = {} - - class RootFinder(EntityVisitor): - - def visit(self, entity: Entity) -> None: - if isinstance(entity, LinkedEntity) and not entity.parents: - roots[entity.document_id] = entity - - visitor = RootFinder() - for entity in self.entities.values(): - entity.accept(visitor) - - return roots - - def leaf_entities(self, entity_type: type[L]) -> Mapping[UUID4, L]: - """ - Return a set of all leaf entities in this bundle. A leaf entity is a - linked entity of a given type that has no descendants of that type. - """ - - empty = {} - - def recurse(entities: Iterable[Entity]) -> Mapping[UUID4, L]: - # Unroll the first two iterations over the argument in order to - # accelerate the common cases - i = iter(entities) - try: - entity = next(i) - except StopIteration: - return empty - else: - try: - next(i) - except StopIteration: - return visit(entity) - else: - return dict_merge(map(visit, entities)) - - def visit(entity: Entity) -> Mapping[UUID4, L]: - if isinstance(entity, LinkedEntity): - leafs = recurse(entity.children.values()) - if leafs: - return leafs - elif isinstance(entity, entity_type): - return {entity.document_id: entity} - return empty - - return recurse(self.root_entities().values()) - - def not_stitched(self, entities: Mapping[UUID, E]) -> list[E]: - return [ - entity - for uuid, entity in entities.items() - if uuid not in self.stitched - ] - - @cached_property - def leaf_cell_suspensions(self) -> Mapping[UUID4, CellSuspension]: - return self.leaf_entities(CellSuspension) - - @property - def specimens(self) -> list[SpecimenFromOrganism]: - return [s for s in self.biomaterials.values() if isinstance(s, SpecimenFromOrganism)] - - @property - def sequencing_input(self) -> list[Biomaterial]: - return [bm for bm in self.biomaterials.values() - if any(ps.is_sequencing_process() for ps in bm.to_processes.values())] - - @property - def sequencing_output(self) -> list[SequenceFile]: - return [f for f in self.files.values() - if isinstance(f, SequenceFile) - and any(ps.is_sequencing_process() for ps in f.from_processes.values())] - - @property - def ref(self) -> EntityReference: - return EntityReference(entity_type='links', entity_id=str(self.uuid)) - - -entity_types = { - # Biomaterials - 'donor_organism': DonorOrganism, - 'specimen_from_organism': SpecimenFromOrganism, - 'cell_suspension': CellSuspension, - 'cell_line': CellLine, - 'organoid': Organoid, - 'imaged_specimen': ImagedSpecimen, - - # Files - 'analysis_file': AnalysisFile, - 'reference_file': ReferenceFile, - 'sequence_file': SequenceFile, - 'supplementary_file': SupplementaryFile, - 'image_file': ImageFile, - - # Protocols - 'protocol': Protocol, - 'analysis_protocol': AnalysisProtocol, - 'aggregate_generation_protocol': AggregateGenerationProtocol, - 'collection_protocol': CollectionProtocol, - 'differentiation_protocol': DifferentiationProtocol, - 'dissociation_protocol': DissociationProtocol, - 'enrichment_protocol': EnrichmentProtocol, - 'ipsc_induction_protocol': IpscInductionProtocol, - 'imaging_protocol': ImagingProtocol, - 'library_preparation_protocol': LibraryPreparationProtocol, - 'sequencing_protocol': SequencingProtocol, - 'imaging_preparation_protocol': ImagingPreparationProtocol, - 'treatment_protocol': TreatmentProtocol, - - 'project': Project, - - # Processes - 'process': Process, - 'analysis_process': AnalysisProcess, - 'dissociation_process': DissociationProcess, - 'enrichment_process': EnrichmentProcess, - 'library_preparation_process': LibraryPreparationProcess, - 'sequencing_process': SequencingProcess -} - -schema_names = { - v: k for k, v in entity_types.items() -} - -core_types = { - entity_type: core_type - for core_type in (Project, Biomaterial, Process, Protocol, File) - for entity_type in entity_types.values() - if issubclass(entity_type, core_type) -} - -assert len(entity_types) == len(schema_names), "The mapping from schema name to entity type is not bijective" - - -def ontology_label(ontology: Mapping[str, str] | None, - default: str | LookupDefault | None = LookupDefault.RAISE) -> str: - """ - Return the best-suited value from the given ontology dictionary. - - >>> ontology_label({'ontology_label': '1', 'text': '2', 'ontology': '3'}) - '1' - - >>> ontology_label({'text': '2', 'ontology': '3'}) - '2' - - >>> ontology_label({'ontology': '3'}) - '3' - - >>> ontology_label({}, default=None) - >>> ontology_label({}, default='default') - 'default' - - >>> ontology_label(None, default=None) - >>> ontology_label(None, default='default') - 'default' - - >>> ontology_label({}) - Traceback (most recent call last): - ... - KeyError: 'ontology_label' - - >>> ontology_label(None) - Traceback (most recent call last): - ... - TypeError: 'NoneType' object is not subscriptable - """ - if ontology is None and default is not LookupDefault.RAISE: - return default - else: - return lookup(ontology, 'ontology_label', 'text', 'ontology', default=default) diff --git a/src/humancellatlas/data/metadata/datetime.py b/src/humancellatlas/data/metadata/datetime.py deleted file mode 100644 index 66502b4c04..0000000000 --- a/src/humancellatlas/data/metadata/datetime.py +++ /dev/null @@ -1,147 +0,0 @@ -from datetime import ( - datetime, - timedelta, - timezone, -) -import re - - -def parse_jsonschema_date_time(s: str) -> datetime | None: - """ - Convert a string in JSONSchema `date-time` format - - https://json-schema.org/understanding-json-schema/reference/string.html#dates-and-times - - to a timezone-aware `datetime` instance. Only up to 6 digits of fractional - seconds are supported. This is a deviation from the standard which allows an - arbitrary number of digits (impracticably so) but Python does not support - more and silent truncation or rounding is not a good option. I never - observed more than six digits in the wild, anyways. - - No fractional seconds, UTC - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26Z') - datetime.datetime(2021, 5, 5, 21, 24, 26, tzinfo=datetime.timezone.utc) - - Fractional seconds, UTC - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.174274Z') - datetime.datetime(2021, 5, 5, 21, 24, 26, 174274, tzinfo=datetime.timezone.utc) - - Same with zero offset - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.174274+00:00') - datetime.datetime(2021, 5, 5, 21, 24, 26, 174274, tzinfo=datetime.timezone.utc) - - Same with negative zero offset - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.174274-00:00') - datetime.datetime(2021, 5, 5, 21, 24, 26, 174274, tzinfo=datetime.timezone.utc) - - Short fraction : - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.5Z') - datetime.datetime(2021, 5, 5, 21, 24, 26, 500000, tzinfo=datetime.timezone.utc) - - Overlong fraction: - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.1234567Z') - Traceback (most recent call last): - ... - ValueError: ('Not an RFC-3339 datetime', '2021-05-05T21:24:26.1234567Z') - - - >>> s1 = '2021-05-05T21:24:26.174274+00:00' - >>> s2 = '2021-05-05T14:24:26.174274-07:00' - >>> d1 = parse_jsonschema_date_time(s1) - >>> d2 = parse_jsonschema_date_time(s2) - >>> d1 - datetime.datetime(2021, 5, 5, 21, 24, 26, 174274, tzinfo=datetime.timezone.utc) - - >>> d1 == d2 - True - - >>> d1.tzinfo == d2.tzinfo - False - - >>> parse_jsonschema_date_time('') - Traceback (most recent call last): - ... - ValueError: ('Not an RFC-3339 datetime', '') - - Missing colon in offset: - - >>> parse_jsonschema_date_time('2021-05-05T14:24:26.174274-0700') - Traceback (most recent call last): - ... - ValueError: ('Not an RFC-3339 datetime', '2021-05-05T14:24:26.174274-0700') - - Out of range hour: (this is just a sample; we're relying in datetime to - enforce ranges on all components) - - >>> parse_jsonschema_date_time('2021-05-05T24:24:26Z') - Traceback (most recent call last): - ... - ValueError: hour must be in 0..23 - - Out of range time offset: - - >>> parse_jsonschema_date_time('2021-05-05T21:24:26.174274-24:00') #doctest: +ELLIPSIS - Traceback (most recent call last): - ... - ValueError: offset must be a timedelta strictly between -timedelta(hours=24) and timedelta(hours=24), not ... - - 2020 was a leap year - - >>> parse_jsonschema_date_time('2020-02-29T00:00:00.0Z') - datetime.datetime(2020, 2, 29, 0, 0, tzinfo=datetime.timezone.utc) - - 2021 was not - - >>> parse_jsonschema_date_time('2021-02-29T00:00:00.0Z') - Traceback (most recent call last): - ... - ValueError: day is out of range for month - """ - pattern = re.compile(r''' - (?P\d{4})-(?P\d{2})-(?P\d{2}) - [Tt] - (?P\d{2}):(?P\d{2}):(?P\d{2}) - (?: - \.(?P\d{1,6}) - )? - (?: - (?P[Zz]) - | - (?P[+-])(?P\d{2}):(?P\d{2}) - ) - ''', flags=re.VERBOSE) - m = pattern.fullmatch(s) - if m: - g = m.groupdict() - year, month, day = int(g['year']), int(g['month']), int(g['day']) - hour, minute, second = int(g['hour']), int(g['minute']), int(g['second']) - fractional_second = g['fractional_second'] - if fractional_second is None: - microsecond = 0 - else: - microsecond = int(fractional_second.ljust(6, '0')) - if g['zulu']: - tzinfo = timezone.utc - else: - offset_hour, offset_minute = int(g['offset_hour']), int(g['offset_minute']) - if offset_hour == 0 and offset_minute == 0: - tzinfo = timezone.utc - else: - sign = g['offset_sign'] - if sign == '-': - sign = -1 - elif sign == '+': - sign = 1 - else: - assert False, sign - delta = sign * timedelta(hours=offset_hour, minutes=offset_minute) - tzinfo = timezone(offset=delta) - return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo) - else: - raise ValueError('Not an RFC-3339 datetime', s) diff --git a/src/humancellatlas/data/metadata/helpers/json.py b/src/humancellatlas/data/metadata/helpers/json.py deleted file mode 100644 index 464f29afe6..0000000000 --- a/src/humancellatlas/data/metadata/helpers/json.py +++ /dev/null @@ -1,48 +0,0 @@ -import copy -from dataclasses import ( - field, - fields, - is_dataclass, -) -from uuid import ( - UUID, -) - -from humancellatlas.data.metadata.api import ( - Entity, -) - - -def as_json(obj, fld: field = None): - if is_dataclass(obj): - d = {f.name: as_json(getattr(obj, f.name), f) for f in fields(obj) if f.repr} - if isinstance(obj, Entity): - d['schema_name'] = obj.schema_name - return d - elif isinstance(obj, (list, tuple, set)): - return [as_json(v) for v in obj] - elif isinstance(obj, dict): - if fld: - # Convert Mapping[UUID, Entity] to List[Entity]. In a JSON structure we typically don't want dynamic keys. - # That makes it easier to descend a JSON structure using dotted field paths. - key_type, value_type = fld.type.__args__ - if _issubclass_(key_type, UUID) and _issubclass_(value_type, Entity): - return [as_json(v) for v in obj.values()] - else: - return {as_json(k): as_json(v) for k, v in obj.items()} - else: - return {as_json(k): as_json(v) for k, v in obj.items()} - elif isinstance(obj, UUID): - return str(obj) - else: - return copy.deepcopy(obj) - - -def _issubclass_(t, s): - import humancellatlas.data.metadata - # FIXME: This is ugly for various reasons: We might get a forward ref from a different module, not - # humancellatlas.data.metadata. _ForwardRef and _eval_type are internals of `typing`. They are exposed via - # typing.get_type_hints but I am currently struggling to make that work. - if t.__class__.__name__ == '_ForwardRef': - t = t._eval_type(localns={}, globalns=humancellatlas.data.metadata.api.__dict__) - return isinstance(t, type) and isinstance(s, type) and issubclass(t, s) diff --git a/src/humancellatlas/data/metadata/helpers/schema_validation.py b/src/humancellatlas/data/metadata/helpers/schema_validation.py deleted file mode 100644 index 3a96d3dffc..0000000000 --- a/src/humancellatlas/data/metadata/helpers/schema_validation.py +++ /dev/null @@ -1,61 +0,0 @@ -from functools import ( - lru_cache, -) -import json -import logging - -from jsonschema import ( - FormatChecker, - ValidationError, -) -from jsonschema.validators import ( - Draft202012Validator, -) -from referencing import ( - Registry, - Resource, -) -import requests - -from azul import ( - R, - cached_property, -) -from azul.types import ( - JSON, -) - -log = logging.getLogger(__name__) - - -class SchemaValidator: - - def validate_json(self, file_json: JSON, file_name: str): - try: - schema = self._download_json_file(file_json['describedBy']) - except json.decoder.JSONDecodeError: - schema_url = file_json['describedBy'] - assert False, R( - 'Failed to parse schema JSON', file_name, schema_url) - self.validator = self.validator.evolve(schema=schema) - try: - self.validator.validate(file_json) - except ValidationError as e: - assert False, R(*e.args, file_name) - - @lru_cache(maxsize=None) - def _download_json_file(self, file_url: str) -> JSON: - response = requests.get(file_url, allow_redirects=False) - response.raise_for_status() - return response.json() - - def _retrieve_resource(self, resource_url: str) -> Resource: - file_json = self._download_json_file(resource_url) - return Resource.from_contents(file_json) - - @cached_property - def validator(self) -> Draft202012Validator: - registry = Registry(retrieve=self._retrieve_resource) - return Draft202012Validator(schema={}, - registry=registry, - format_checker=FormatChecker()) diff --git a/src/humancellatlas/data/metadata/helpers/staging_area.py b/src/humancellatlas/data/metadata/helpers/staging_area.py deleted file mode 100644 index 7bda07e3e6..0000000000 --- a/src/humancellatlas/data/metadata/helpers/staging_area.py +++ /dev/null @@ -1,318 +0,0 @@ -from collections import ( - defaultdict, -) -import json -import logging -from pathlib import ( - Path, -) -from typing import ( - ClassVar, - Mapping, - Self, - Sequence, - TypeVar, -) -from uuid import ( - UUID, - uuid5, -) - -import attr -from furl import ( - furl, -) - -import git - -from azul import ( - reject, - require, -) -from azul.indexer.document import ( - EntityReference, -) -from azul.types import ( - JSON, - MutableJSON, -) -from humancellatlas.data.metadata.api import ( - Bundle, -) -from humancellatlas.data.metadata.helpers.schema_validation import ( - SchemaValidator, -) - -log = logging.getLogger(__name__) - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class JsonFile: - """ - A JSON file in the staging area. - """ - uuid: str - version: str - name: str - content: MutableJSON - _validator: ClassVar[SchemaValidator] = SchemaValidator() - - def __attrs_post_init__(self): - self._validator.validate_json(self.content, self.name) - - @classmethod - def from_json(cls, file_name: str, content: MutableJSON) -> 'JsonFile': - def parse_file_name(file_name: str) -> Sequence[str]: - suffix = '.json' - assert file_name.endswith(suffix), file_name - return file_name[:-len(suffix)].split('_') - - schema_type = content['schema_type'] - if schema_type == 'links': - subgraph_id, version, project_id = parse_file_name(file_name) - return LinksFile(uuid=subgraph_id, - version=version, - name=file_name, - content=content, - project_id=project_id) - else: - entity_id, version = parse_file_name(file_name) - if schema_type == 'file_descriptor': - return DescriptorFile(uuid=entity_id, - version=version, - name=file_name, - content=content) - else: # 'biomaterial', 'protocol', 'file', ... - return MetadataFile(uuid=entity_id, - version=version, - name=file_name, - content=content) - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class LinksFile(JsonFile): - """ - A file describing the links between entities in a subgraph. - """ - project_id: str - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class MetadataFile(JsonFile): - """ - A file describing one entity (e.g. biomaterial, protocol) in a subgraph. - """ - pass - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class DescriptorFile(JsonFile): - """ - A file containing the checksums and other information for asserting the - integrity of a data file. - """ - namespace: ClassVar[UUID] = UUID('5767014a-c431-4019-8703-0ab1b3e9e4d0') - - @property - def manifest_entry(self): - """ - The content of a descriptor transformed into a format ready to create a - ManifestEntry object. - """ - return { - 'content-type': self.content['content_type'], - 'crc32c': self.content['crc32c'], - 'indexed': False, - 'name': self.content['file_name'], - 's3_etag': self.content['s3_etag'], - 'sha1': self.content['sha1'], - 'sha256': self.content['sha256'], - 'size': self.content['size'], - 'uuid': str(uuid5(self.namespace, self.content['file_name'])), - 'version': self.content['file_version'] - } - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class StagingArea: - links: Mapping[str, LinksFile] # Key is the subgraph ID aka links_id - metadata: Mapping[str, MetadataFile] # Key is the entity ID - descriptors: Mapping[str, DescriptorFile] # Key is the entity ID - - def get_bundle(self, subgraph_id: str) -> Bundle: - """ - Return a bundle from the staging area - """ - version, manifest, metadata, links = self.get_bundle_parts(subgraph_id) - return Bundle(subgraph_id, version, manifest, metadata, links) - - def get_bundle_parts(self, - subgraph_id: str - ) -> tuple[str, MutableJSON, MutableJSON, MutableJSON]: - """ - Return the components to create a bundle from the staging area - """ - links_file = self.links[subgraph_id] - manifest = {} - metadata = {} - entity_ids_by_type = self._entity_ids_by_type(subgraph_id) - for entity_type, entity_ids in entity_ids_by_type.items(): - # Sort entity_ids to produce the same ordering on multiple runs - for entity_id in sorted(entity_ids): - metadata_file = self.metadata[entity_id] - json_content = metadata_file.content - key = str(EntityReference(entity_type=entity_type, entity_id=entity_id)) - metadata[key] = json_content - if entity_type.endswith('_file'): - file_manifest = self.descriptors[entity_id].manifest_entry - manifest[key] = file_manifest - else: - pass - return links_file.version, manifest, metadata, links_file.content - - def _entity_ids_by_type(self, - subgraph_id: str - ) -> dict[str, set[str]]: - """ - Return a mapping of entity types (e.g. 'analysis_file', - 'cell_suspension') to a set of entity IDs - """ - links_file: LinksFile = self.links[subgraph_id] - links_json = links_file.content - entity_ids = defaultdict(set) - # Project ID is only mentioned in the links JSON if there is a - # supplementary_file_link so add it in here to make sure it is included. - entity_ids['project'].add(links_file.project_id) - link: JSON - for link in links_json['links']: - link_type = link['link_type'] - if link_type == 'process_link': - entity_type = link['process_type'] - entity_id = link['process_id'] - entity_ids[entity_type].add(entity_id) - for category in ('input', 'output', 'protocol'): - for file in link[f'{category}s']: - entity_type = file[f'{category}_type'] - entity_id = file[f'{category}_id'] - entity_ids[entity_type].add(entity_id) - elif link_type == 'supplementary_file_link': - for file in link['files']: - entity_type = file['file_type'] - entity_id = file['file_id'] - entity_ids[entity_type].add(entity_id) - else: - raise ValueError('Unknown link type', link_type) - entity_ids.default_factory = None - return entity_ids - - -JSON_FILE = TypeVar('JSON_FILE', bound=JsonFile) - - -@attr.s(frozen=True, kw_only=True, auto_attribs=True) -class CannedStagingAreaFactory: - #: Path to a local directory containing one or more staging areas - base_path: Path - - @classmethod - def clone_remote(cls, remote_url: furl, local_path: Path, ref: str) -> Self: - """ - Clone a remote Git repository and return a factory for staging areas - inside that clone. - - :param remote_url: The URL of a remote Git repository containing one or - more staging areas - - :param local_path: The path to an empty local directory where the - repository will be cloned - - :param ref: A Git ref (branch, tag, or commit SHA) - """ - log.debug('Cloning %s into %s', remote_url, local_path) - repo = git.Repo.clone_from(str(remote_url), local_path) - log.debug('Checking out ref %s', ref) - repo.git.checkout(ref) - return cls(base_path=local_path) - - def load_staging_area(self, path: Path) -> StagingArea: - """ - Create and return a staging area object from the files in a local - staging area. - - :param path: The relative path from `self.base_path` to a local staging - area - """ - path = self.base_path / path - staging_area_folders = {p.name for p in path.iterdir()} - expected_folders = {'data', 'descriptors', 'links', 'metadata'} - require(expected_folders == staging_area_folders, - 'Invalid staging area', path) - return StagingArea(links=self._get_link_files(path), - metadata=self._get_metadata_files(path), - descriptors=self._get_descriptor_files(path)) - - def _get_link_files(self, path: Path) -> dict[str, LinksFile]: - """ - Return a mapping of file ID to file content for all the link files in - the staging area. - """ - return self._get_files(path=path / 'links', file_cls=LinksFile) - - def _get_metadata_files(self, path: Path) -> dict[str, MetadataFile]: - """ - Return a mapping of file ID to file content for all the metadata files - in the staging area. - """ - files = {} - for sub_dir in (path / 'metadata').iterdir(): - assert sub_dir.is_dir() - files.update(self._get_files(path=sub_dir, file_cls=MetadataFile)) - return files - - def _get_descriptor_files(self, path: Path) -> dict[str, DescriptorFile]: - """ - Return a mapping of file ID to file content for all the descriptor files - in the staging area. - """ - files = {} - for sub_dir in (path / 'descriptors').iterdir(): - assert sub_dir.is_dir() - files.update(self._get_files(path=sub_dir, file_cls=DescriptorFile)) - return files - - def _get_files(self, - path: Path, - file_cls: type[JSON_FILE] - ) -> dict[str, JSON_FILE]: - """ - Return a mapping of file ID to file content for all the files found in - the directory at the given path. - """ - files = {} - log.debug('Reading files in %s', path) - for file in path.iterdir(): - assert file.is_file() - with open(file, 'r') as f: - content = json.load(f) - file_name = file.name - json_file = JsonFile.from_json(file_name, content) - require(isinstance(json_file, file_cls), json_file) - self._add_file(files, json_file) - return files - - def _add_file(self, files: dict[str, JSON_FILE], file: JSON_FILE) -> None: - """ - Add `file` to `files`. If a file with the same ID already exists in - `files`, the file with the most recent version will be kept. - """ - try: - existing_version = files[file.uuid].version - except KeyError: - files[file.uuid] = file - else: - reject(file.version == existing_version, file) - if file.version > existing_version: - files[file.uuid] = file - else: - log.debug('Discarding previous %s version of file %s', - existing_version, file) diff --git a/src/humancellatlas/data/metadata/lookup.py b/src/humancellatlas/data/metadata/lookup.py deleted file mode 100644 index 41fd21f679..0000000000 --- a/src/humancellatlas/data/metadata/lookup.py +++ /dev/null @@ -1,73 +0,0 @@ -from enum import ( - Enum, -) -from typing import ( - Mapping, - TypeVar, -) - -K = TypeVar('K') -V = TypeVar('V') - - -class LookupDefault(Enum): - RAISE = 0 - - -def lookup(d: Mapping[K, V], - k: K, - *ks: K, - default: V | LookupDefault | None = LookupDefault.RAISE - ) -> V: - """ - Look up a value in the specified dictionary given one or more candidate keys. - - This function raises a key error for the first (!) key if none of the keys are present and the `default` keyword - argument absent. If the `default` keyword argument is present (None is a valid default), this function returns - that argument instead of raising an KeyError in that case. This is notably different to dict.get() whose default - default is `None`. This function does not have a default default. - - If the first key is present, return its value ... - >>> lookup({1:2}, 1) - 2 - - ... and ignore the other keys. - >>> lookup({1:2}, 1, 3) - 2 - - If the first key is absent, try the fallbacks. - >>> lookup({1:2}, 3, 1) - 2 - - If the key isn't present, raise a KeyError referring to that key. - >>> lookup({1:2}, 3) - Traceback (most recent call last): - ... - KeyError: 3 - - If neither the first key nor the fallbacks are present, raise a KeyError referring to the first key. - >>> lookup({1:2}, 3, 4) - Traceback (most recent call last): - ... - KeyError: 3 - - If the key isn't present but a default was passed, return the default. - >>> lookup({1:2}, 3, default=4) - 4 - - None is a valid default. - >>> lookup({1:2}, 3, 4, default=None) is None - True - """ - try: - return d[k] - except KeyError: - for k in ks: - try: - return d[k] - except KeyError: - pass - if default is LookupDefault.RAISE: - raise - else: - return default diff --git a/stubs/hca/dss/__init__.pyi b/stubs/hca/dss/__init__.pyi deleted file mode 100644 index 219b75d783..0000000000 --- a/stubs/hca/dss/__init__.pyi +++ /dev/null @@ -1,20 +0,0 @@ -from hca.util import SwaggerClient - - -class _Method: - - def _request(self): ... - - def __call__(self, *args, **kwargs): ... - - -class DSSClient(SwaggerClient): - get_bundle = _Method() - get_file = _Method() - put_file = _Method() - put_bundle = _Method() - get_subscriptions = _Method() - put_subscription = _Method() - delete_subscription = _Method() - post_search = _Method() - get_bundles_all = _Method() diff --git a/swagger/index.html b/swagger/index.html deleted file mode 100644 index 84ae62d3da..0000000000 --- a/swagger/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - Swagger UI - - - - - - - -
- - - - - diff --git a/swagger/oauth2-redirect.html b/swagger/oauth2-redirect.html deleted file mode 100644 index 5640917181..0000000000 --- a/swagger/oauth2-redirect.html +++ /dev/null @@ -1,79 +0,0 @@ - - - - Swagger UI: OAuth2 Redirect - - - - - diff --git a/swagger/oauth2-redirect.html.template.mustache b/swagger/oauth2-redirect.html.template.mustache deleted file mode 100644 index fad71fcd78..0000000000 --- a/swagger/oauth2-redirect.html.template.mustache +++ /dev/null @@ -1,79 +0,0 @@ - - - - Swagger UI: OAuth2 Redirect - - - - - diff --git a/swagger/swagger-initializer.js b/swagger/swagger-initializer.js deleted file mode 100644 index 8ea0ea3afc..0000000000 --- a/swagger/swagger-initializer.js +++ /dev/null @@ -1,20 +0,0 @@ -window.onload = function() { - // - - // the following lines will be replaced by docker/configurator, when it runs in a docker-container - window.ui = SwaggerUIBundle({ - url: "https://petstore.swagger.io/v2/swagger.json", - dom_id: '#swagger-ui', - deepLinking: true, - presets: [ - SwaggerUIBundle.presets.apis, - SwaggerUIStandalonePreset - ], - plugins: [ - SwaggerUIBundle.plugins.DownloadUrl - ], - layout: "StandaloneLayout" - }); - - // -}; diff --git a/swagger/swagger-initializer.js.template.mustache b/swagger/swagger-initializer.js.template.mustache deleted file mode 100644 index b83e4b5207..0000000000 --- a/swagger/swagger-initializer.js.template.mustache +++ /dev/null @@ -1,41 +0,0 @@ -window.onload = function() { - // - - // Adapted from https://github.com/swagger-api/swagger-ui/issues/3725#issuecomment-334899276 - const DisableTryItOutPlugin = function() { - return { - statePlugins: { - spec: { - wrapSelectors: { - allowTryItOutFor: (oriSelector, system) => (state, ...args) => { - return oriSelector(state, ...args) && ({{{NON_INTERACTIVE_METHODS}}}.indexOf(args.join('/')) == -1); - } - } - } - } - } - } - - window.ui = SwaggerUIBundle({ - url: {{{OPENAPI_SPEC}}}, - dom_id: '#swagger-ui', - deepLinking: true, - presets: [ - SwaggerUIBundle.presets.apis - ], - plugins: [ - SwaggerUIBundle.plugins.DownloadUrl, - DisableTryItOutPlugin - ], - oauth2RedirectUrl: {{{OAUTH2_REDIRECT_URL}}} - }); - - const client_id = {{{OAUTH2_CLIENT_ID}}}; - if (client_id !== null) { - window.ui.initOAuth({ - clientId: client_id - }) - } - - // -}; diff --git a/swagger/swagger-ui-bundle.js b/swagger/swagger-ui-bundle.js deleted file mode 100644 index a080132459..0000000000 --- a/swagger/swagger-ui-bundle.js +++ /dev/null @@ -1,2 +0,0 @@ -/*! For license information please see swagger-ui-bundle.js.LICENSE.txt */ -!function webpackUniversalModuleDefinition(s,o){"object"==typeof exports&&"object"==typeof module?module.exports=o():"function"==typeof define&&define.amd?define([],o):"object"==typeof exports?exports.SwaggerUIBundle=o():s.SwaggerUIBundle=o()}(this,(()=>(()=>{var s={251:(s,o)=>{o.read=function(s,o,i,a,u){var _,w,x=8*u-a-1,C=(1<>1,L=-7,B=i?u-1:0,$=i?-1:1,V=s[o+B];for(B+=$,_=V&(1<<-L)-1,V>>=-L,L+=x;L>0;_=256*_+s[o+B],B+=$,L-=8);for(w=_&(1<<-L)-1,_>>=-L,L+=a;L>0;w=256*w+s[o+B],B+=$,L-=8);if(0===_)_=1-j;else{if(_===C)return w?NaN:1/0*(V?-1:1);w+=Math.pow(2,a),_-=j}return(V?-1:1)*w*Math.pow(2,_-a)},o.write=function(s,o,i,a,u,_){var w,x,C,j=8*_-u-1,L=(1<>1,$=23===u?Math.pow(2,-24)-Math.pow(2,-77):0,V=a?0:_-1,U=a?1:-1,z=o<0||0===o&&1/o<0?1:0;for(o=Math.abs(o),isNaN(o)||o===1/0?(x=isNaN(o)?1:0,w=L):(w=Math.floor(Math.log(o)/Math.LN2),o*(C=Math.pow(2,-w))<1&&(w--,C*=2),(o+=w+B>=1?$/C:$*Math.pow(2,1-B))*C>=2&&(w++,C/=2),w+B>=L?(x=0,w=L):w+B>=1?(x=(o*C-1)*Math.pow(2,u),w+=B):(x=o*Math.pow(2,B-1)*Math.pow(2,u),w=0));u>=8;s[i+V]=255&x,V+=U,x/=256,u-=8);for(w=w<0;s[i+V]=255&w,V+=U,w/=256,j-=8);s[i+V-U]|=128*z}},462:(s,o,i)=>{"use strict";var a=i(40975);s.exports=a},659:(s,o,i)=>{var a=i(51873),u=Object.prototype,_=u.hasOwnProperty,w=u.toString,x=a?a.toStringTag:void 0;s.exports=function getRawTag(s){var o=_.call(s,x),i=s[x];try{s[x]=void 0;var a=!0}catch(s){}var u=w.call(s);return a&&(o?s[x]=i:delete s[x]),u}},694:(s,o,i)=>{"use strict";i(91599);var a=i(37257);i(12560),s.exports=a},953:(s,o,i)=>{"use strict";s.exports=i(53375)},1733:s=>{var o=/[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g;s.exports=function asciiWords(s){return s.match(o)||[]}},1882:(s,o,i)=>{var a=i(72552),u=i(23805);s.exports=function isFunction(s){if(!u(s))return!1;var o=a(s);return"[object Function]"==o||"[object GeneratorFunction]"==o||"[object AsyncFunction]"==o||"[object Proxy]"==o}},1907:(s,o,i)=>{"use strict";var a=i(41505),u=Function.prototype,_=u.call,w=a&&u.bind.bind(_,_);s.exports=a?w:function(s){return function(){return _.apply(s,arguments)}}},2205:function(s,o,i){var a;a=void 0!==i.g?i.g:this,s.exports=function(s){if(s.CSS&&s.CSS.escape)return s.CSS.escape;var cssEscape=function(s){if(0==arguments.length)throw new TypeError("`CSS.escape` requires an argument.");for(var o,i=String(s),a=i.length,u=-1,_="",w=i.charCodeAt(0);++u=1&&o<=31||127==o||0==u&&o>=48&&o<=57||1==u&&o>=48&&o<=57&&45==w?"\\"+o.toString(16)+" ":0==u&&1==a&&45==o||!(o>=128||45==o||95==o||o>=48&&o<=57||o>=65&&o<=90||o>=97&&o<=122)?"\\"+i.charAt(u):i.charAt(u):_+="�";return _};return s.CSS||(s.CSS={}),s.CSS.escape=cssEscape,cssEscape}(a)},2209:(s,o,i)=>{"use strict";var a,u=i(9404),_=function productionTypeChecker(){invariant(!1,"ImmutablePropTypes type checking code is stripped in production.")};_.isRequired=_;var w=function getProductionTypeChecker(){return _};function getPropType(s){var o=typeof s;return Array.isArray(s)?"array":s instanceof RegExp?"object":s instanceof u.Iterable?"Immutable."+s.toSource().split(" ")[0]:o}function createChainableTypeChecker(s){function checkType(o,i,a,u,_,w){for(var x=arguments.length,C=Array(x>6?x-6:0),j=6;j>",null!=i[a]?s.apply(void 0,[i,a,u,_,w].concat(C)):o?new Error("Required "+_+" `"+w+"` was not specified in `"+u+"`."):void 0}var o=checkType.bind(null,!1);return o.isRequired=checkType.bind(null,!0),o}function createIterableSubclassTypeChecker(s,o){return function createImmutableTypeChecker(s,o){return createChainableTypeChecker((function validate(i,a,u,_,w){var x=i[a];if(!o(x)){var C=getPropType(x);return new Error("Invalid "+_+" `"+w+"` of type `"+C+"` supplied to `"+u+"`, expected `"+s+"`.")}return null}))}("Iterable."+s,(function(s){return u.Iterable.isIterable(s)&&o(s)}))}(a={listOf:w,mapOf:w,orderedMapOf:w,setOf:w,orderedSetOf:w,stackOf:w,iterableOf:w,recordOf:w,shape:w,contains:w,mapContains:w,orderedMapContains:w,list:_,map:_,orderedMap:_,set:_,orderedSet:_,stack:_,seq:_,record:_,iterable:_}).iterable.indexed=createIterableSubclassTypeChecker("Indexed",u.Iterable.isIndexed),a.iterable.keyed=createIterableSubclassTypeChecker("Keyed",u.Iterable.isKeyed),s.exports=a},2404:(s,o,i)=>{var a=i(60270);s.exports=function isEqual(s,o){return a(s,o)}},2523:s=>{s.exports=function baseFindIndex(s,o,i,a){for(var u=s.length,_=i+(a?1:-1);a?_--:++_{"use strict";var a=i(45951),u=Object.defineProperty;s.exports=function(s,o){try{u(a,s,{value:o,configurable:!0,writable:!0})}catch(i){a[s]=o}return o}},2694:(s,o,i)=>{"use strict";var a=i(6925);function emptyFunction(){}function emptyFunctionWithReset(){}emptyFunctionWithReset.resetWarningCache=emptyFunction,s.exports=function(){function shim(s,o,i,u,_,w){if(w!==a){var x=new Error("Calling PropTypes validators directly is not supported by the `prop-types` package. Use PropTypes.checkPropTypes() to call them. Read more at http://fb.me/use-check-prop-types");throw x.name="Invariant Violation",x}}function getShim(){return shim}shim.isRequired=shim;var s={array:shim,bigint:shim,bool:shim,func:shim,number:shim,object:shim,string:shim,symbol:shim,any:shim,arrayOf:getShim,element:shim,elementType:shim,instanceOf:getShim,node:shim,objectOf:getShim,oneOf:getShim,oneOfType:getShim,shape:getShim,exact:getShim,checkPropTypes:emptyFunctionWithReset,resetWarningCache:emptyFunction};return s.PropTypes=s,s}},2874:s=>{s.exports={}},2875:(s,o,i)=>{"use strict";var a=i(23045),u=i(80376);s.exports=Object.keys||function keys(s){return a(s,u)}},2955:(s,o,i)=>{"use strict";var a,u=i(65606);function _defineProperty(s,o,i){return(o=function _toPropertyKey(s){var o=function _toPrimitive(s,o){if("object"!=typeof s||null===s)return s;var i=s[Symbol.toPrimitive];if(void 0!==i){var a=i.call(s,o||"default");if("object"!=typeof a)return a;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===o?String:Number)(s)}(s,"string");return"symbol"==typeof o?o:String(o)}(o))in s?Object.defineProperty(s,o,{value:i,enumerable:!0,configurable:!0,writable:!0}):s[o]=i,s}var _=i(86238),w=Symbol("lastResolve"),x=Symbol("lastReject"),C=Symbol("error"),j=Symbol("ended"),L=Symbol("lastPromise"),B=Symbol("handlePromise"),$=Symbol("stream");function createIterResult(s,o){return{value:s,done:o}}function readAndResolve(s){var o=s[w];if(null!==o){var i=s[$].read();null!==i&&(s[L]=null,s[w]=null,s[x]=null,o(createIterResult(i,!1)))}}function onReadable(s){u.nextTick(readAndResolve,s)}var V=Object.getPrototypeOf((function(){})),U=Object.setPrototypeOf((_defineProperty(a={get stream(){return this[$]},next:function next(){var s=this,o=this[C];if(null!==o)return Promise.reject(o);if(this[j])return Promise.resolve(createIterResult(void 0,!0));if(this[$].destroyed)return new Promise((function(o,i){u.nextTick((function(){s[C]?i(s[C]):o(createIterResult(void 0,!0))}))}));var i,a=this[L];if(a)i=new Promise(function wrapForNext(s,o){return function(i,a){s.then((function(){o[j]?i(createIterResult(void 0,!0)):o[B](i,a)}),a)}}(a,this));else{var _=this[$].read();if(null!==_)return Promise.resolve(createIterResult(_,!1));i=new Promise(this[B])}return this[L]=i,i}},Symbol.asyncIterator,(function(){return this})),_defineProperty(a,"return",(function _return(){var s=this;return new Promise((function(o,i){s[$].destroy(null,(function(s){s?i(s):o(createIterResult(void 0,!0))}))}))})),a),V);s.exports=function createReadableStreamAsyncIterator(s){var o,i=Object.create(U,(_defineProperty(o={},$,{value:s,writable:!0}),_defineProperty(o,w,{value:null,writable:!0}),_defineProperty(o,x,{value:null,writable:!0}),_defineProperty(o,C,{value:null,writable:!0}),_defineProperty(o,j,{value:s._readableState.endEmitted,writable:!0}),_defineProperty(o,B,{value:function value(s,o){var a=i[$].read();a?(i[L]=null,i[w]=null,i[x]=null,s(createIterResult(a,!1))):(i[w]=s,i[x]=o)},writable:!0}),o));return i[L]=null,_(s,(function(s){if(s&&"ERR_STREAM_PREMATURE_CLOSE"!==s.code){var o=i[x];return null!==o&&(i[L]=null,i[w]=null,i[x]=null,o(s)),void(i[C]=s)}var a=i[w];null!==a&&(i[L]=null,i[w]=null,i[x]=null,a(createIterResult(void 0,!0))),i[j]=!0})),s.on("readable",onReadable.bind(null,i)),i}},3110:(s,o,i)=>{const a=i(5187),u=i(85015),_=i(98023),w=i(53812),x=i(23805),C=i(85105),j=i(86804);class Namespace{constructor(s){this.elementMap={},this.elementDetection=[],this.Element=j.Element,this.KeyValuePair=j.KeyValuePair,s&&s.noDefault||this.useDefault(),this._attributeElementKeys=[],this._attributeElementArrayKeys=[]}use(s){return s.namespace&&s.namespace({base:this}),s.load&&s.load({base:this}),this}useDefault(){return this.register("null",j.NullElement).register("string",j.StringElement).register("number",j.NumberElement).register("boolean",j.BooleanElement).register("array",j.ArrayElement).register("object",j.ObjectElement).register("member",j.MemberElement).register("ref",j.RefElement).register("link",j.LinkElement),this.detect(a,j.NullElement,!1).detect(u,j.StringElement,!1).detect(_,j.NumberElement,!1).detect(w,j.BooleanElement,!1).detect(Array.isArray,j.ArrayElement,!1).detect(x,j.ObjectElement,!1),this}register(s,o){return this._elements=void 0,this.elementMap[s]=o,this}unregister(s){return this._elements=void 0,delete this.elementMap[s],this}detect(s,o,i){return void 0===i||i?this.elementDetection.unshift([s,o]):this.elementDetection.push([s,o]),this}toElement(s){if(s instanceof this.Element)return s;let o;for(let i=0;i{const o=s[0].toUpperCase()+s.substr(1);this._elements[o]=this.elementMap[s]}))),this._elements}get serialiser(){return new C(this)}}C.prototype.Namespace=Namespace,s.exports=Namespace},3121:(s,o,i)=>{"use strict";var a=i(65482),u=Math.min;s.exports=function(s){var o=a(s);return o>0?u(o,9007199254740991):0}},3209:(s,o,i)=>{var a=i(91596),u=i(53320),_=i(36306),w="__lodash_placeholder__",x=128,C=Math.min;s.exports=function mergeData(s,o){var i=s[1],j=o[1],L=i|j,B=L<131,$=j==x&&8==i||j==x&&256==i&&s[7].length<=o[8]||384==j&&o[7].length<=o[8]&&8==i;if(!B&&!$)return s;1&j&&(s[2]=o[2],L|=1&i?0:4);var V=o[3];if(V){var U=s[3];s[3]=U?a(U,V,o[4]):V,s[4]=U?_(s[3],w):o[4]}return(V=o[5])&&(U=s[5],s[5]=U?u(U,V,o[6]):V,s[6]=U?_(s[5],w):o[6]),(V=o[7])&&(s[7]=V),j&x&&(s[8]=null==s[8]?o[8]:C(s[8],o[8])),null==s[9]&&(s[9]=o[9]),s[0]=o[0],s[1]=L,s}},3650:(s,o,i)=>{var a=i(74335)(Object.keys,Object);s.exports=a},3656:(s,o,i)=>{s=i.nmd(s);var a=i(9325),u=i(89935),_=o&&!o.nodeType&&o,w=_&&s&&!s.nodeType&&s,x=w&&w.exports===_?a.Buffer:void 0,C=(x?x.isBuffer:void 0)||u;s.exports=C},4509:(s,o,i)=>{var a=i(12651);s.exports=function mapCacheHas(s){return a(this,s).has(s)}},4640:s=>{"use strict";var o=String;s.exports=function(s){try{return o(s)}catch(s){return"Object"}}},4664:(s,o,i)=>{var a=i(79770),u=i(63345),_=Object.prototype.propertyIsEnumerable,w=Object.getOwnPropertySymbols,x=w?function(s){return null==s?[]:(s=Object(s),a(w(s),(function(o){return _.call(s,o)})))}:u;s.exports=x},4901:(s,o,i)=>{var a=i(72552),u=i(30294),_=i(40346),w={};w["[object Float32Array]"]=w["[object Float64Array]"]=w["[object Int8Array]"]=w["[object Int16Array]"]=w["[object Int32Array]"]=w["[object Uint8Array]"]=w["[object Uint8ClampedArray]"]=w["[object Uint16Array]"]=w["[object Uint32Array]"]=!0,w["[object Arguments]"]=w["[object Array]"]=w["[object ArrayBuffer]"]=w["[object Boolean]"]=w["[object DataView]"]=w["[object Date]"]=w["[object Error]"]=w["[object Function]"]=w["[object Map]"]=w["[object Number]"]=w["[object Object]"]=w["[object RegExp]"]=w["[object Set]"]=w["[object String]"]=w["[object WeakMap]"]=!1,s.exports=function baseIsTypedArray(s){return _(s)&&u(s.length)&&!!w[a(s)]}},4993:(s,o,i)=>{"use strict";var a=i(16946),u=i(74239);s.exports=function(s){return a(u(s))}},5187:s=>{s.exports=function isNull(s){return null===s}},5419:s=>{s.exports=function(s,o,i,a){var u=new Blob(void 0!==a?[a,s]:[s],{type:i||"application/octet-stream"});if(void 0!==window.navigator.msSaveBlob)window.navigator.msSaveBlob(u,o);else{var _=window.URL&&window.URL.createObjectURL?window.URL.createObjectURL(u):window.webkitURL.createObjectURL(u),w=document.createElement("a");w.style.display="none",w.href=_,w.setAttribute("download",o),void 0===w.download&&w.setAttribute("target","_blank"),document.body.appendChild(w),w.click(),setTimeout((function(){document.body.removeChild(w),window.URL.revokeObjectURL(_)}),200)}}},5556:(s,o,i)=>{s.exports=i(2694)()},5861:(s,o,i)=>{var a=i(55580),u=i(68223),_=i(32804),w=i(76545),x=i(28303),C=i(72552),j=i(47473),L="[object Map]",B="[object Promise]",$="[object Set]",V="[object WeakMap]",U="[object DataView]",z=j(a),Y=j(u),Z=j(_),ee=j(w),ie=j(x),ae=C;(a&&ae(new a(new ArrayBuffer(1)))!=U||u&&ae(new u)!=L||_&&ae(_.resolve())!=B||w&&ae(new w)!=$||x&&ae(new x)!=V)&&(ae=function(s){var o=C(s),i="[object Object]"==o?s.constructor:void 0,a=i?j(i):"";if(a)switch(a){case z:return U;case Y:return L;case Z:return B;case ee:return $;case ie:return V}return o}),s.exports=ae},6048:s=>{s.exports=function negate(s){if("function"!=typeof s)throw new TypeError("Expected a function");return function(){var o=arguments;switch(o.length){case 0:return!s.call(this);case 1:return!s.call(this,o[0]);case 2:return!s.call(this,o[0],o[1]);case 3:return!s.call(this,o[0],o[1],o[2])}return!s.apply(this,o)}}},6205:s=>{s.exports={ROOT:0,GROUP:1,POSITION:2,SET:3,RANGE:4,REPETITION:5,REFERENCE:6,CHAR:7}},6233:(s,o,i)=>{const a=i(6048),u=i(10316),_=i(92340);class ArrayElement extends u{constructor(s,o,i){super(s||[],o,i),this.element="array"}primitive(){return"array"}get(s){return this.content[s]}getValue(s){const o=this.get(s);if(o)return o.toValue()}getIndex(s){return this.content[s]}set(s,o){return this.content[s]=this.refract(o),this}remove(s){const o=this.content.splice(s,1);return o.length?o[0]:null}map(s,o){return this.content.map(s,o)}flatMap(s,o){return this.map(s,o).reduce(((s,o)=>s.concat(o)),[])}compactMap(s,o){const i=[];return this.forEach((a=>{const u=s.bind(o)(a);u&&i.push(u)})),i}filter(s,o){return new _(this.content.filter(s,o))}reject(s,o){return this.filter(a(s),o)}reduce(s,o){let i,a;void 0!==o?(i=0,a=this.refract(o)):(i=1,a="object"===this.primitive()?this.first.value:this.first);for(let o=i;o{s.bind(o)(i,this.refract(a))}))}shift(){return this.content.shift()}unshift(s){this.content.unshift(this.refract(s))}push(s){return this.content.push(this.refract(s)),this}add(s){this.push(s)}findElements(s,o){const i=o||{},a=!!i.recursive,u=void 0===i.results?[]:i.results;return this.forEach(((o,i,_)=>{a&&void 0!==o.findElements&&o.findElements(s,{results:u,recursive:a}),s(o,i,_)&&u.push(o)})),u}find(s){return new _(this.findElements(s,{recursive:!0}))}findByElement(s){return this.find((o=>o.element===s))}findByClass(s){return this.find((o=>o.classes.includes(s)))}getById(s){return this.find((o=>o.id.toValue()===s)).first}includes(s){return this.content.some((o=>o.equals(s)))}contains(s){return this.includes(s)}empty(){return new this.constructor([])}"fantasy-land/empty"(){return this.empty()}concat(s){return new this.constructor(this.content.concat(s.content))}"fantasy-land/concat"(s){return this.concat(s)}"fantasy-land/map"(s){return new this.constructor(this.map(s))}"fantasy-land/chain"(s){return this.map((o=>s(o)),this).reduce(((s,o)=>s.concat(o)),this.empty())}"fantasy-land/filter"(s){return new this.constructor(this.content.filter(s))}"fantasy-land/reduce"(s,o){return this.content.reduce(s,o)}get length(){return this.content.length}get isEmpty(){return 0===this.content.length}get first(){return this.getIndex(0)}get second(){return this.getIndex(1)}get last(){return this.getIndex(this.length-1)}}ArrayElement.empty=function empty(){return new this},ArrayElement["fantasy-land/empty"]=ArrayElement.empty,"undefined"!=typeof Symbol&&(ArrayElement.prototype[Symbol.iterator]=function symbol(){return this.content[Symbol.iterator]()}),s.exports=ArrayElement},6499:(s,o,i)=>{"use strict";var a=i(1907),u=0,_=Math.random(),w=a(1..toString);s.exports=function(s){return"Symbol("+(void 0===s?"":s)+")_"+w(++u+_,36)}},6925:s=>{"use strict";s.exports="SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED"},7057:(s,o,i)=>{"use strict";var a=i(11470).charAt,u=i(90160),_=i(64932),w=i(60183),x=i(59550),C="String Iterator",j=_.set,L=_.getterFor(C);w(String,"String",(function(s){j(this,{type:C,string:u(s),index:0})}),(function next(){var s,o=L(this),i=o.string,u=o.index;return u>=i.length?x(void 0,!0):(s=a(i,u),o.index+=s.length,x(s,!1))}))},7309:(s,o,i)=>{var a=i(62006)(i(24713));s.exports=a},7376:s=>{"use strict";s.exports=!0},7463:(s,o,i)=>{"use strict";var a=i(98828),u=i(62250),_=/#|\.prototype\./,isForced=function(s,o){var i=x[w(s)];return i===j||i!==C&&(u(o)?a(o):!!o)},w=isForced.normalize=function(s){return String(s).replace(_,".").toLowerCase()},x=isForced.data={},C=isForced.NATIVE="N",j=isForced.POLYFILL="P";s.exports=isForced},7666:(s,o,i)=>{var a=i(84851),u=i(953);function _extends(){var o;return s.exports=_extends=a?u(o=a).call(o):function(s){for(var o=1;o{const a=i(6205);o.wordBoundary=()=>({type:a.POSITION,value:"b"}),o.nonWordBoundary=()=>({type:a.POSITION,value:"B"}),o.begin=()=>({type:a.POSITION,value:"^"}),o.end=()=>({type:a.POSITION,value:"$"})},8068:s=>{"use strict";var o=(()=>{var s=Object.defineProperty,o=Object.getOwnPropertyDescriptor,i=Object.getOwnPropertyNames,a=Object.getOwnPropertySymbols,u=Object.prototype.hasOwnProperty,_=Object.prototype.propertyIsEnumerable,__defNormalProp=(o,i,a)=>i in o?s(o,i,{enumerable:!0,configurable:!0,writable:!0,value:a}):o[i]=a,__spreadValues=(s,o)=>{for(var i in o||(o={}))u.call(o,i)&&__defNormalProp(s,i,o[i]);if(a)for(var i of a(o))_.call(o,i)&&__defNormalProp(s,i,o[i]);return s},__publicField=(s,o,i)=>__defNormalProp(s,"symbol"!=typeof o?o+"":o,i),w={};((o,i)=>{for(var a in i)s(o,a,{get:i[a],enumerable:!0})})(w,{DEFAULT_OPTIONS:()=>C,DEFAULT_UUID_LENGTH:()=>x,default:()=>B});var x=6,C={dictionary:"alphanum",shuffle:!0,debug:!1,length:x,counter:0},j=class _ShortUniqueId{constructor(s={}){__publicField(this,"counter"),__publicField(this,"debug"),__publicField(this,"dict"),__publicField(this,"version"),__publicField(this,"dictIndex",0),__publicField(this,"dictRange",[]),__publicField(this,"lowerBound",0),__publicField(this,"upperBound",0),__publicField(this,"dictLength",0),__publicField(this,"uuidLength"),__publicField(this,"_digit_first_ascii",48),__publicField(this,"_digit_last_ascii",58),__publicField(this,"_alpha_lower_first_ascii",97),__publicField(this,"_alpha_lower_last_ascii",123),__publicField(this,"_hex_last_ascii",103),__publicField(this,"_alpha_upper_first_ascii",65),__publicField(this,"_alpha_upper_last_ascii",91),__publicField(this,"_number_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii]}),__publicField(this,"_alpha_dict_ranges",{lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alpha_lower_dict_ranges",{lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii]}),__publicField(this,"_alpha_upper_dict_ranges",{upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alphanum_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_alphanum_lower_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],lowerCase:[this._alpha_lower_first_ascii,this._alpha_lower_last_ascii]}),__publicField(this,"_alphanum_upper_dict_ranges",{digits:[this._digit_first_ascii,this._digit_last_ascii],upperCase:[this._alpha_upper_first_ascii,this._alpha_upper_last_ascii]}),__publicField(this,"_hex_dict_ranges",{decDigits:[this._digit_first_ascii,this._digit_last_ascii],alphaDigits:[this._alpha_lower_first_ascii,this._hex_last_ascii]}),__publicField(this,"_dict_ranges",{_number_dict_ranges:this._number_dict_ranges,_alpha_dict_ranges:this._alpha_dict_ranges,_alpha_lower_dict_ranges:this._alpha_lower_dict_ranges,_alpha_upper_dict_ranges:this._alpha_upper_dict_ranges,_alphanum_dict_ranges:this._alphanum_dict_ranges,_alphanum_lower_dict_ranges:this._alphanum_lower_dict_ranges,_alphanum_upper_dict_ranges:this._alphanum_upper_dict_ranges,_hex_dict_ranges:this._hex_dict_ranges}),__publicField(this,"log",((...s)=>{const o=[...s];o[0]="[short-unique-id] ".concat(s[0]),!0!==this.debug||"undefined"==typeof console||null===console||console.log(...o)})),__publicField(this,"_normalizeDictionary",((s,o)=>{let i;if(s&&Array.isArray(s)&&s.length>1)i=s;else{i=[],this.dictIndex=0;const o="_".concat(s,"_dict_ranges"),a=this._dict_ranges[o];let u=0;for(const[,s]of Object.entries(a)){const[o,i]=s;u+=Math.abs(i-o)}i=new Array(u);let _=0;for(const[,s]of Object.entries(a)){this.dictRange=s,this.lowerBound=this.dictRange[0],this.upperBound=this.dictRange[1];const o=this.lowerBound<=this.upperBound,a=this.lowerBound,u=this.upperBound;if(o)for(let s=a;su;s--)i[_++]=String.fromCharCode(s),this.dictIndex=s}i.length=_}if(o){for(let s=i.length-1;s>0;s--){const o=Math.floor(Math.random()*(s+1));[i[s],i[o]]=[i[o],i[s]]}}return i})),__publicField(this,"setDictionary",((s,o)=>{this.dict=this._normalizeDictionary(s,o),this.dictLength=this.dict.length,this.setCounter(0)})),__publicField(this,"seq",(()=>this.sequentialUUID())),__publicField(this,"sequentialUUID",(()=>{const s=this.dictLength,o=this.dict;let i=this.counter;const a=[];do{const u=i%s;i=Math.trunc(i/s),a.push(o[u])}while(0!==i);const u=a.join("");return this.counter+=1,u})),__publicField(this,"rnd",((s=this.uuidLength||x)=>this.randomUUID(s))),__publicField(this,"randomUUID",((s=this.uuidLength||x)=>{if(null==s||s<1)throw new Error("Invalid UUID Length Provided");const o=new Array(s),i=this.dictLength,a=this.dict;for(let u=0;uthis.formattedUUID(s,o))),__publicField(this,"formattedUUID",((s,o)=>{const i={$r:this.randomUUID,$s:this.sequentialUUID,$t:this.stamp};return s.replace(/\$[rs]\d{0,}|\$t0|\$t[1-9]\d{1,}/g,(s=>{const a=s.slice(0,2),u=Number.parseInt(s.slice(2),10);return"$s"===a?i[a]().padStart(u,"0"):"$t"===a&&o?i[a](u,o):i[a](u)}))})),__publicField(this,"availableUUIDs",((s=this.uuidLength)=>Number.parseFloat(([...new Set(this.dict)].length**s).toFixed(0)))),__publicField(this,"_collisionCache",new Map),__publicField(this,"approxMaxBeforeCollision",((s=this.availableUUIDs(this.uuidLength))=>{const o=s,i=this._collisionCache.get(o);if(void 0!==i)return i;const a=Number.parseFloat(Math.sqrt(Math.PI/2*s).toFixed(20));return this._collisionCache.set(o,a),a})),__publicField(this,"collisionProbability",((s=this.availableUUIDs(this.uuidLength),o=this.uuidLength)=>Number.parseFloat((this.approxMaxBeforeCollision(s)/this.availableUUIDs(o)).toFixed(20)))),__publicField(this,"uniqueness",((s=this.availableUUIDs(this.uuidLength))=>{const o=Number.parseFloat((1-this.approxMaxBeforeCollision(s)/s).toFixed(20));return o>1?1:o<0?0:o})),__publicField(this,"getVersion",(()=>this.version)),__publicField(this,"stamp",((s,o)=>{const i=Math.floor(+(o||new Date)/1e3).toString(16);if("number"==typeof s&&0===s)return i;if("number"!=typeof s||s<10)throw new Error(["Param finalLength must be a number greater than or equal to 10,","or 0 if you want the raw hexadecimal timestamp"].join("\n"));const a=s-9,u=Math.round(Math.random()*(a>15?15:a)),_=this.randomUUID(a);return"".concat(_.substring(0,u)).concat(i).concat(_.substring(u)).concat(u.toString(16))})),__publicField(this,"parseStamp",((s,o)=>{if(o&&!/t0|t[1-9]\d{1,}/.test(o))throw new Error("Cannot extract date from a formated UUID with no timestamp in the format");const i=o?o.replace(/\$[rs]\d{0,}|\$t0|\$t[1-9]\d{1,}/g,(s=>{const o={$r:s=>[...Array(s)].map((()=>"r")).join(""),$s:s=>[...Array(s)].map((()=>"s")).join(""),$t:s=>[...Array(s)].map((()=>"t")).join("")},i=s.slice(0,2),a=Number.parseInt(s.slice(2),10);return o[i](a)})).replace(/^(.*?)(t{8,})(.*)$/g,((o,i,a)=>s.substring(i.length,i.length+a.length))):s;if(8===i.length)return new Date(1e3*Number.parseInt(i,16));if(i.length<10)throw new Error("Stamp length invalid");const a=Number.parseInt(i.substring(i.length-1),16);return new Date(1e3*Number.parseInt(i.substring(a,a+8),16))})),__publicField(this,"setCounter",(s=>{this.counter=s})),__publicField(this,"validate",((s,o)=>{const i=o?this._normalizeDictionary(o):this.dict;return s.split("").every((s=>i.includes(s)))}));const o=__spreadValues(__spreadValues({},C),s);this.counter=0,this.debug=!1,this.dict=[],this.version="5.3.2";const{dictionary:i,shuffle:a,length:u,counter:_}=o;this.uuidLength=u,this.setDictionary(i,a),this.setCounter(_),this.debug=o.debug,this.log(this.dict),this.log("Generator instantiated with Dictionary Size ".concat(this.dictLength," and counter set to ").concat(this.counter)),this.log=this.log.bind(this),this.setDictionary=this.setDictionary.bind(this),this.setCounter=this.setCounter.bind(this),this.seq=this.seq.bind(this),this.sequentialUUID=this.sequentialUUID.bind(this),this.rnd=this.rnd.bind(this),this.randomUUID=this.randomUUID.bind(this),this.fmt=this.fmt.bind(this),this.formattedUUID=this.formattedUUID.bind(this),this.availableUUIDs=this.availableUUIDs.bind(this),this.approxMaxBeforeCollision=this.approxMaxBeforeCollision.bind(this),this.collisionProbability=this.collisionProbability.bind(this),this.uniqueness=this.uniqueness.bind(this),this.getVersion=this.getVersion.bind(this),this.stamp=this.stamp.bind(this),this.parseStamp=this.parseStamp.bind(this)}};__publicField(j,"default",j);var L,B=j;return L=w,((a,_,w,x)=>{if(_&&"object"==typeof _||"function"==typeof _)for(let C of i(_))u.call(a,C)||C===w||s(a,C,{get:()=>_[C],enumerable:!(x=o(_,C))||x.enumerable});return a})(s({},"__esModule",{value:!0}),L)})();s.exports=o.default,"undefined"!=typeof window&&(o=o.default)},9325:(s,o,i)=>{var a=i(34840),u="object"==typeof self&&self&&self.Object===Object&&self,_=a||u||Function("return this")();s.exports=_},9404:function(s){s.exports=function(){"use strict";var s=Array.prototype.slice;function createClass(s,o){o&&(s.prototype=Object.create(o.prototype)),s.prototype.constructor=s}function Iterable(s){return isIterable(s)?s:Seq(s)}function KeyedIterable(s){return isKeyed(s)?s:KeyedSeq(s)}function IndexedIterable(s){return isIndexed(s)?s:IndexedSeq(s)}function SetIterable(s){return isIterable(s)&&!isAssociative(s)?s:SetSeq(s)}function isIterable(s){return!(!s||!s[o])}function isKeyed(s){return!(!s||!s[i])}function isIndexed(s){return!(!s||!s[a])}function isAssociative(s){return isKeyed(s)||isIndexed(s)}function isOrdered(s){return!(!s||!s[u])}createClass(KeyedIterable,Iterable),createClass(IndexedIterable,Iterable),createClass(SetIterable,Iterable),Iterable.isIterable=isIterable,Iterable.isKeyed=isKeyed,Iterable.isIndexed=isIndexed,Iterable.isAssociative=isAssociative,Iterable.isOrdered=isOrdered,Iterable.Keyed=KeyedIterable,Iterable.Indexed=IndexedIterable,Iterable.Set=SetIterable;var o="@@__IMMUTABLE_ITERABLE__@@",i="@@__IMMUTABLE_KEYED__@@",a="@@__IMMUTABLE_INDEXED__@@",u="@@__IMMUTABLE_ORDERED__@@",_="delete",w=5,x=1<>>0;if(""+i!==o||4294967295===i)return NaN;o=i}return o<0?ensureSize(s)+o:o}function returnTrue(){return!0}function wholeSlice(s,o,i){return(0===s||void 0!==i&&s<=-i)&&(void 0===o||void 0!==i&&o>=i)}function resolveBegin(s,o){return resolveIndex(s,o,0)}function resolveEnd(s,o){return resolveIndex(s,o,o)}function resolveIndex(s,o,i){return void 0===s?i:s<0?Math.max(0,o+s):void 0===o?s:Math.min(o,s)}var $=0,V=1,U=2,z="function"==typeof Symbol&&Symbol.iterator,Y="@@iterator",Z=z||Y;function Iterator(s){this.next=s}function iteratorValue(s,o,i,a){var u=0===s?o:1===s?i:[o,i];return a?a.value=u:a={value:u,done:!1},a}function iteratorDone(){return{value:void 0,done:!0}}function hasIterator(s){return!!getIteratorFn(s)}function isIterator(s){return s&&"function"==typeof s.next}function getIterator(s){var o=getIteratorFn(s);return o&&o.call(s)}function getIteratorFn(s){var o=s&&(z&&s[z]||s[Y]);if("function"==typeof o)return o}function isArrayLike(s){return s&&"number"==typeof s.length}function Seq(s){return null==s?emptySequence():isIterable(s)?s.toSeq():seqFromValue(s)}function KeyedSeq(s){return null==s?emptySequence().toKeyedSeq():isIterable(s)?isKeyed(s)?s.toSeq():s.fromEntrySeq():keyedSeqFromValue(s)}function IndexedSeq(s){return null==s?emptySequence():isIterable(s)?isKeyed(s)?s.entrySeq():s.toIndexedSeq():indexedSeqFromValue(s)}function SetSeq(s){return(null==s?emptySequence():isIterable(s)?isKeyed(s)?s.entrySeq():s:indexedSeqFromValue(s)).toSetSeq()}Iterator.prototype.toString=function(){return"[Iterator]"},Iterator.KEYS=$,Iterator.VALUES=V,Iterator.ENTRIES=U,Iterator.prototype.inspect=Iterator.prototype.toSource=function(){return this.toString()},Iterator.prototype[Z]=function(){return this},createClass(Seq,Iterable),Seq.of=function(){return Seq(arguments)},Seq.prototype.toSeq=function(){return this},Seq.prototype.toString=function(){return this.__toString("Seq {","}")},Seq.prototype.cacheResult=function(){return!this._cache&&this.__iterateUncached&&(this._cache=this.entrySeq().toArray(),this.size=this._cache.length),this},Seq.prototype.__iterate=function(s,o){return seqIterate(this,s,o,!0)},Seq.prototype.__iterator=function(s,o){return seqIterator(this,s,o,!0)},createClass(KeyedSeq,Seq),KeyedSeq.prototype.toKeyedSeq=function(){return this},createClass(IndexedSeq,Seq),IndexedSeq.of=function(){return IndexedSeq(arguments)},IndexedSeq.prototype.toIndexedSeq=function(){return this},IndexedSeq.prototype.toString=function(){return this.__toString("Seq [","]")},IndexedSeq.prototype.__iterate=function(s,o){return seqIterate(this,s,o,!1)},IndexedSeq.prototype.__iterator=function(s,o){return seqIterator(this,s,o,!1)},createClass(SetSeq,Seq),SetSeq.of=function(){return SetSeq(arguments)},SetSeq.prototype.toSetSeq=function(){return this},Seq.isSeq=isSeq,Seq.Keyed=KeyedSeq,Seq.Set=SetSeq,Seq.Indexed=IndexedSeq;var ee,ie,ae,ce="@@__IMMUTABLE_SEQ__@@";function ArraySeq(s){this._array=s,this.size=s.length}function ObjectSeq(s){var o=Object.keys(s);this._object=s,this._keys=o,this.size=o.length}function IterableSeq(s){this._iterable=s,this.size=s.length||s.size}function IteratorSeq(s){this._iterator=s,this._iteratorCache=[]}function isSeq(s){return!(!s||!s[ce])}function emptySequence(){return ee||(ee=new ArraySeq([]))}function keyedSeqFromValue(s){var o=Array.isArray(s)?new ArraySeq(s).fromEntrySeq():isIterator(s)?new IteratorSeq(s).fromEntrySeq():hasIterator(s)?new IterableSeq(s).fromEntrySeq():"object"==typeof s?new ObjectSeq(s):void 0;if(!o)throw new TypeError("Expected Array or iterable object of [k, v] entries, or keyed object: "+s);return o}function indexedSeqFromValue(s){var o=maybeIndexedSeqFromValue(s);if(!o)throw new TypeError("Expected Array or iterable object of values: "+s);return o}function seqFromValue(s){var o=maybeIndexedSeqFromValue(s)||"object"==typeof s&&new ObjectSeq(s);if(!o)throw new TypeError("Expected Array or iterable object of values, or keyed object: "+s);return o}function maybeIndexedSeqFromValue(s){return isArrayLike(s)?new ArraySeq(s):isIterator(s)?new IteratorSeq(s):hasIterator(s)?new IterableSeq(s):void 0}function seqIterate(s,o,i,a){var u=s._cache;if(u){for(var _=u.length-1,w=0;w<=_;w++){var x=u[i?_-w:w];if(!1===o(x[1],a?x[0]:w,s))return w+1}return w}return s.__iterateUncached(o,i)}function seqIterator(s,o,i,a){var u=s._cache;if(u){var _=u.length-1,w=0;return new Iterator((function(){var s=u[i?_-w:w];return w++>_?iteratorDone():iteratorValue(o,a?s[0]:w-1,s[1])}))}return s.__iteratorUncached(o,i)}function fromJS(s,o){return o?fromJSWith(o,s,"",{"":s}):fromJSDefault(s)}function fromJSWith(s,o,i,a){return Array.isArray(o)?s.call(a,i,IndexedSeq(o).map((function(i,a){return fromJSWith(s,i,a,o)}))):isPlainObj(o)?s.call(a,i,KeyedSeq(o).map((function(i,a){return fromJSWith(s,i,a,o)}))):o}function fromJSDefault(s){return Array.isArray(s)?IndexedSeq(s).map(fromJSDefault).toList():isPlainObj(s)?KeyedSeq(s).map(fromJSDefault).toMap():s}function isPlainObj(s){return s&&(s.constructor===Object||void 0===s.constructor)}function is(s,o){if(s===o||s!=s&&o!=o)return!0;if(!s||!o)return!1;if("function"==typeof s.valueOf&&"function"==typeof o.valueOf){if((s=s.valueOf())===(o=o.valueOf())||s!=s&&o!=o)return!0;if(!s||!o)return!1}return!("function"!=typeof s.equals||"function"!=typeof o.equals||!s.equals(o))}function deepEqual(s,o){if(s===o)return!0;if(!isIterable(o)||void 0!==s.size&&void 0!==o.size&&s.size!==o.size||void 0!==s.__hash&&void 0!==o.__hash&&s.__hash!==o.__hash||isKeyed(s)!==isKeyed(o)||isIndexed(s)!==isIndexed(o)||isOrdered(s)!==isOrdered(o))return!1;if(0===s.size&&0===o.size)return!0;var i=!isAssociative(s);if(isOrdered(s)){var a=s.entries();return o.every((function(s,o){var u=a.next().value;return u&&is(u[1],s)&&(i||is(u[0],o))}))&&a.next().done}var u=!1;if(void 0===s.size)if(void 0===o.size)"function"==typeof s.cacheResult&&s.cacheResult();else{u=!0;var _=s;s=o,o=_}var w=!0,x=o.__iterate((function(o,a){if(i?!s.has(o):u?!is(o,s.get(a,j)):!is(s.get(a,j),o))return w=!1,!1}));return w&&s.size===x}function Repeat(s,o){if(!(this instanceof Repeat))return new Repeat(s,o);if(this._value=s,this.size=void 0===o?1/0:Math.max(0,o),0===this.size){if(ie)return ie;ie=this}}function invariant(s,o){if(!s)throw new Error(o)}function Range(s,o,i){if(!(this instanceof Range))return new Range(s,o,i);if(invariant(0!==i,"Cannot step a Range by 0"),s=s||0,void 0===o&&(o=1/0),i=void 0===i?1:Math.abs(i),oa?iteratorDone():iteratorValue(s,u,i[o?a-u++:u++])}))},createClass(ObjectSeq,KeyedSeq),ObjectSeq.prototype.get=function(s,o){return void 0===o||this.has(s)?this._object[s]:o},ObjectSeq.prototype.has=function(s){return this._object.hasOwnProperty(s)},ObjectSeq.prototype.__iterate=function(s,o){for(var i=this._object,a=this._keys,u=a.length-1,_=0;_<=u;_++){var w=a[o?u-_:_];if(!1===s(i[w],w,this))return _+1}return _},ObjectSeq.prototype.__iterator=function(s,o){var i=this._object,a=this._keys,u=a.length-1,_=0;return new Iterator((function(){var w=a[o?u-_:_];return _++>u?iteratorDone():iteratorValue(s,w,i[w])}))},ObjectSeq.prototype[u]=!0,createClass(IterableSeq,IndexedSeq),IterableSeq.prototype.__iterateUncached=function(s,o){if(o)return this.cacheResult().__iterate(s,o);var i=getIterator(this._iterable),a=0;if(isIterator(i))for(var u;!(u=i.next()).done&&!1!==s(u.value,a++,this););return a},IterableSeq.prototype.__iteratorUncached=function(s,o){if(o)return this.cacheResult().__iterator(s,o);var i=getIterator(this._iterable);if(!isIterator(i))return new Iterator(iteratorDone);var a=0;return new Iterator((function(){var o=i.next();return o.done?o:iteratorValue(s,a++,o.value)}))},createClass(IteratorSeq,IndexedSeq),IteratorSeq.prototype.__iterateUncached=function(s,o){if(o)return this.cacheResult().__iterate(s,o);for(var i,a=this._iterator,u=this._iteratorCache,_=0;_=a.length){var o=i.next();if(o.done)return o;a[u]=o.value}return iteratorValue(s,u,a[u++])}))},createClass(Repeat,IndexedSeq),Repeat.prototype.toString=function(){return 0===this.size?"Repeat []":"Repeat [ "+this._value+" "+this.size+" times ]"},Repeat.prototype.get=function(s,o){return this.has(s)?this._value:o},Repeat.prototype.includes=function(s){return is(this._value,s)},Repeat.prototype.slice=function(s,o){var i=this.size;return wholeSlice(s,o,i)?this:new Repeat(this._value,resolveEnd(o,i)-resolveBegin(s,i))},Repeat.prototype.reverse=function(){return this},Repeat.prototype.indexOf=function(s){return is(this._value,s)?0:-1},Repeat.prototype.lastIndexOf=function(s){return is(this._value,s)?this.size:-1},Repeat.prototype.__iterate=function(s,o){for(var i=0;i=0&&o=0&&ii?iteratorDone():iteratorValue(s,_++,w)}))},Range.prototype.equals=function(s){return s instanceof Range?this._start===s._start&&this._end===s._end&&this._step===s._step:deepEqual(this,s)},createClass(Collection,Iterable),createClass(KeyedCollection,Collection),createClass(IndexedCollection,Collection),createClass(SetCollection,Collection),Collection.Keyed=KeyedCollection,Collection.Indexed=IndexedCollection,Collection.Set=SetCollection;var le="function"==typeof Math.imul&&-2===Math.imul(4294967295,2)?Math.imul:function imul(s,o){var i=65535&(s|=0),a=65535&(o|=0);return i*a+((s>>>16)*a+i*(o>>>16)<<16>>>0)|0};function smi(s){return s>>>1&1073741824|3221225471&s}function hash(s){if(!1===s||null==s)return 0;if("function"==typeof s.valueOf&&(!1===(s=s.valueOf())||null==s))return 0;if(!0===s)return 1;var o=typeof s;if("number"===o){if(s!=s||s===1/0)return 0;var i=0|s;for(i!==s&&(i^=4294967295*s);s>4294967295;)i^=s/=4294967295;return smi(i)}if("string"===o)return s.length>Se?cachedHashString(s):hashString(s);if("function"==typeof s.hashCode)return s.hashCode();if("object"===o)return hashJSObj(s);if("function"==typeof s.toString)return hashString(s.toString());throw new Error("Value type "+o+" cannot be hashed.")}function cachedHashString(s){var o=Pe[s];return void 0===o&&(o=hashString(s),xe===we&&(xe=0,Pe={}),xe++,Pe[s]=o),o}function hashString(s){for(var o=0,i=0;i0)switch(s.nodeType){case 1:return s.uniqueID;case 9:return s.documentElement&&s.documentElement.uniqueID}}var fe,ye="function"==typeof WeakMap;ye&&(fe=new WeakMap);var be=0,_e="__immutablehash__";"function"==typeof Symbol&&(_e=Symbol(_e));var Se=16,we=255,xe=0,Pe={};function assertNotInfinite(s){invariant(s!==1/0,"Cannot perform this action with an infinite size.")}function Map(s){return null==s?emptyMap():isMap(s)&&!isOrdered(s)?s:emptyMap().withMutations((function(o){var i=KeyedIterable(s);assertNotInfinite(i.size),i.forEach((function(s,i){return o.set(i,s)}))}))}function isMap(s){return!(!s||!s[Re])}createClass(Map,KeyedCollection),Map.of=function(){var o=s.call(arguments,0);return emptyMap().withMutations((function(s){for(var i=0;i=o.length)throw new Error("Missing value for key: "+o[i]);s.set(o[i],o[i+1])}}))},Map.prototype.toString=function(){return this.__toString("Map {","}")},Map.prototype.get=function(s,o){return this._root?this._root.get(0,void 0,s,o):o},Map.prototype.set=function(s,o){return updateMap(this,s,o)},Map.prototype.setIn=function(s,o){return this.updateIn(s,j,(function(){return o}))},Map.prototype.remove=function(s){return updateMap(this,s,j)},Map.prototype.deleteIn=function(s){return this.updateIn(s,(function(){return j}))},Map.prototype.update=function(s,o,i){return 1===arguments.length?s(this):this.updateIn([s],o,i)},Map.prototype.updateIn=function(s,o,i){i||(i=o,o=void 0);var a=updateInDeepMap(this,forceIterator(s),o,i);return a===j?void 0:a},Map.prototype.clear=function(){return 0===this.size?this:this.__ownerID?(this.size=0,this._root=null,this.__hash=void 0,this.__altered=!0,this):emptyMap()},Map.prototype.merge=function(){return mergeIntoMapWith(this,void 0,arguments)},Map.prototype.mergeWith=function(o){return mergeIntoMapWith(this,o,s.call(arguments,1))},Map.prototype.mergeIn=function(o){var i=s.call(arguments,1);return this.updateIn(o,emptyMap(),(function(s){return"function"==typeof s.merge?s.merge.apply(s,i):i[i.length-1]}))},Map.prototype.mergeDeep=function(){return mergeIntoMapWith(this,deepMerger,arguments)},Map.prototype.mergeDeepWith=function(o){var i=s.call(arguments,1);return mergeIntoMapWith(this,deepMergerWith(o),i)},Map.prototype.mergeDeepIn=function(o){var i=s.call(arguments,1);return this.updateIn(o,emptyMap(),(function(s){return"function"==typeof s.mergeDeep?s.mergeDeep.apply(s,i):i[i.length-1]}))},Map.prototype.sort=function(s){return OrderedMap(sortFactory(this,s))},Map.prototype.sortBy=function(s,o){return OrderedMap(sortFactory(this,o,s))},Map.prototype.withMutations=function(s){var o=this.asMutable();return s(o),o.wasAltered()?o.__ensureOwner(this.__ownerID):this},Map.prototype.asMutable=function(){return this.__ownerID?this:this.__ensureOwner(new OwnerID)},Map.prototype.asImmutable=function(){return this.__ensureOwner()},Map.prototype.wasAltered=function(){return this.__altered},Map.prototype.__iterator=function(s,o){return new MapIterator(this,s,o)},Map.prototype.__iterate=function(s,o){var i=this,a=0;return this._root&&this._root.iterate((function(o){return a++,s(o[1],o[0],i)}),o),a},Map.prototype.__ensureOwner=function(s){return s===this.__ownerID?this:s?makeMap(this.size,this._root,s,this.__hash):(this.__ownerID=s,this.__altered=!1,this)},Map.isMap=isMap;var Te,Re="@@__IMMUTABLE_MAP__@@",$e=Map.prototype;function ArrayMapNode(s,o){this.ownerID=s,this.entries=o}function BitmapIndexedNode(s,o,i){this.ownerID=s,this.bitmap=o,this.nodes=i}function HashArrayMapNode(s,o,i){this.ownerID=s,this.count=o,this.nodes=i}function HashCollisionNode(s,o,i){this.ownerID=s,this.keyHash=o,this.entries=i}function ValueNode(s,o,i){this.ownerID=s,this.keyHash=o,this.entry=i}function MapIterator(s,o,i){this._type=o,this._reverse=i,this._stack=s._root&&mapIteratorFrame(s._root)}function mapIteratorValue(s,o){return iteratorValue(s,o[0],o[1])}function mapIteratorFrame(s,o){return{node:s,index:0,__prev:o}}function makeMap(s,o,i,a){var u=Object.create($e);return u.size=s,u._root=o,u.__ownerID=i,u.__hash=a,u.__altered=!1,u}function emptyMap(){return Te||(Te=makeMap(0))}function updateMap(s,o,i){var a,u;if(s._root){var _=MakeRef(L),w=MakeRef(B);if(a=updateNode(s._root,s.__ownerID,0,void 0,o,i,_,w),!w.value)return s;u=s.size+(_.value?i===j?-1:1:0)}else{if(i===j)return s;u=1,a=new ArrayMapNode(s.__ownerID,[[o,i]])}return s.__ownerID?(s.size=u,s._root=a,s.__hash=void 0,s.__altered=!0,s):a?makeMap(u,a):emptyMap()}function updateNode(s,o,i,a,u,_,w,x){return s?s.update(o,i,a,u,_,w,x):_===j?s:(SetRef(x),SetRef(w),new ValueNode(o,a,[u,_]))}function isLeafNode(s){return s.constructor===ValueNode||s.constructor===HashCollisionNode}function mergeIntoNode(s,o,i,a,u){if(s.keyHash===a)return new HashCollisionNode(o,a,[s.entry,u]);var _,x=(0===i?s.keyHash:s.keyHash>>>i)&C,j=(0===i?a:a>>>i)&C;return new BitmapIndexedNode(o,1<>>=1)w[C]=1&i?o[_++]:void 0;return w[a]=u,new HashArrayMapNode(s,_+1,w)}function mergeIntoMapWith(s,o,i){for(var a=[],u=0;u>1&1431655765))+(s>>2&858993459))+(s>>4)&252645135,s+=s>>8,127&(s+=s>>16)}function setIn(s,o,i,a){var u=a?s:arrCopy(s);return u[o]=i,u}function spliceIn(s,o,i,a){var u=s.length+1;if(a&&o+1===u)return s[o]=i,s;for(var _=new Array(u),w=0,x=0;x=qe)return createNodes(s,C,a,u);var V=s&&s===this.ownerID,U=V?C:arrCopy(C);return $?x?L===B-1?U.pop():U[L]=U.pop():U[L]=[a,u]:U.push([a,u]),V?(this.entries=U,this):new ArrayMapNode(s,U)}},BitmapIndexedNode.prototype.get=function(s,o,i,a){void 0===o&&(o=hash(i));var u=1<<((0===s?o:o>>>s)&C),_=this.bitmap;return _&u?this.nodes[popCount(_&u-1)].get(s+w,o,i,a):a},BitmapIndexedNode.prototype.update=function(s,o,i,a,u,_,x){void 0===i&&(i=hash(a));var L=(0===o?i:i>>>o)&C,B=1<=ze)return expandNodes(s,z,$,L,Z);if(V&&!Z&&2===z.length&&isLeafNode(z[1^U]))return z[1^U];if(V&&Z&&1===z.length&&isLeafNode(Z))return Z;var ee=s&&s===this.ownerID,ie=V?Z?$:$^B:$|B,ae=V?Z?setIn(z,U,Z,ee):spliceOut(z,U,ee):spliceIn(z,U,Z,ee);return ee?(this.bitmap=ie,this.nodes=ae,this):new BitmapIndexedNode(s,ie,ae)},HashArrayMapNode.prototype.get=function(s,o,i,a){void 0===o&&(o=hash(i));var u=(0===s?o:o>>>s)&C,_=this.nodes[u];return _?_.get(s+w,o,i,a):a},HashArrayMapNode.prototype.update=function(s,o,i,a,u,_,x){void 0===i&&(i=hash(a));var L=(0===o?i:i>>>o)&C,B=u===j,$=this.nodes,V=$[L];if(B&&!V)return this;var U=updateNode(V,s,o+w,i,a,u,_,x);if(U===V)return this;var z=this.count;if(V){if(!U&&--z0&&a=0&&s>>o&C;if(a>=this.array.length)return new VNode([],s);var u,_=0===a;if(o>0){var x=this.array[a];if((u=x&&x.removeBefore(s,o-w,i))===x&&_)return this}if(_&&!u)return this;var j=editableVNode(this,s);if(!_)for(var L=0;L>>o&C;if(u>=this.array.length)return this;if(o>0){var _=this.array[u];if((a=_&&_.removeAfter(s,o-w,i))===_&&u===this.array.length-1)return this}var x=editableVNode(this,s);return x.array.splice(u+1),a&&(x.array[u]=a),x};var Xe,Qe,et={};function iterateList(s,o){var i=s._origin,a=s._capacity,u=getTailOffset(a),_=s._tail;return iterateNodeOrLeaf(s._root,s._level,0);function iterateNodeOrLeaf(s,o,i){return 0===o?iterateLeaf(s,i):iterateNode(s,o,i)}function iterateLeaf(s,w){var C=w===u?_&&_.array:s&&s.array,j=w>i?0:i-w,L=a-w;return L>x&&(L=x),function(){if(j===L)return et;var s=o?--L:j++;return C&&C[s]}}function iterateNode(s,u,_){var C,j=s&&s.array,L=_>i?0:i-_>>u,B=1+(a-_>>u);return B>x&&(B=x),function(){for(;;){if(C){var s=C();if(s!==et)return s;C=null}if(L===B)return et;var i=o?--B:L++;C=iterateNodeOrLeaf(j&&j[i],u-w,_+(i<=s.size||o<0)return s.withMutations((function(s){o<0?setListBounds(s,o).set(0,i):setListBounds(s,0,o+1).set(o,i)}));o+=s._origin;var a=s._tail,u=s._root,_=MakeRef(B);return o>=getTailOffset(s._capacity)?a=updateVNode(a,s.__ownerID,0,o,i,_):u=updateVNode(u,s.__ownerID,s._level,o,i,_),_.value?s.__ownerID?(s._root=u,s._tail=a,s.__hash=void 0,s.__altered=!0,s):makeList(s._origin,s._capacity,s._level,u,a):s}function updateVNode(s,o,i,a,u,_){var x,j=a>>>i&C,L=s&&j0){var B=s&&s.array[j],$=updateVNode(B,o,i-w,a,u,_);return $===B?s:((x=editableVNode(s,o)).array[j]=$,x)}return L&&s.array[j]===u?s:(SetRef(_),x=editableVNode(s,o),void 0===u&&j===x.array.length-1?x.array.pop():x.array[j]=u,x)}function editableVNode(s,o){return o&&s&&o===s.ownerID?s:new VNode(s?s.array.slice():[],o)}function listNodeFor(s,o){if(o>=getTailOffset(s._capacity))return s._tail;if(o<1<0;)i=i.array[o>>>a&C],a-=w;return i}}function setListBounds(s,o,i){void 0!==o&&(o|=0),void 0!==i&&(i|=0);var a=s.__ownerID||new OwnerID,u=s._origin,_=s._capacity,x=u+o,j=void 0===i?_:i<0?_+i:u+i;if(x===u&&j===_)return s;if(x>=j)return s.clear();for(var L=s._level,B=s._root,$=0;x+$<0;)B=new VNode(B&&B.array.length?[void 0,B]:[],a),$+=1<<(L+=w);$&&(x+=$,u+=$,j+=$,_+=$);for(var V=getTailOffset(_),U=getTailOffset(j);U>=1<V?new VNode([],a):z;if(z&&U>V&&x<_&&z.array.length){for(var Z=B=editableVNode(B,a),ee=L;ee>w;ee-=w){var ie=V>>>ee&C;Z=Z.array[ie]=editableVNode(Z.array[ie],a)}Z.array[V>>>w&C]=z}if(j<_&&(Y=Y&&Y.removeAfter(a,0,j)),x>=U)x-=U,j-=U,L=w,B=null,Y=Y&&Y.removeBefore(a,0,x);else if(x>u||U>>L&C;if(ae!==U>>>L&C)break;ae&&($+=(1<u&&(B=B.removeBefore(a,L,x-$)),B&&Uu&&(u=x.size),isIterable(w)||(x=x.map((function(s){return fromJS(s)}))),a.push(x)}return u>s.size&&(s=s.setSize(u)),mergeIntoCollectionWith(s,o,a)}function getTailOffset(s){return s>>w<=x&&w.size>=2*_.size?(a=(u=w.filter((function(s,o){return void 0!==s&&C!==o}))).toKeyedSeq().map((function(s){return s[0]})).flip().toMap(),s.__ownerID&&(a.__ownerID=u.__ownerID=s.__ownerID)):(a=_.remove(o),u=C===w.size-1?w.pop():w.set(C,void 0))}else if(L){if(i===w.get(C)[1])return s;a=_,u=w.set(C,[o,i])}else a=_.set(o,w.size),u=w.set(w.size,[o,i]);return s.__ownerID?(s.size=a.size,s._map=a,s._list=u,s.__hash=void 0,s):makeOrderedMap(a,u)}function ToKeyedSequence(s,o){this._iter=s,this._useKeys=o,this.size=s.size}function ToIndexedSequence(s){this._iter=s,this.size=s.size}function ToSetSequence(s){this._iter=s,this.size=s.size}function FromEntriesSequence(s){this._iter=s,this.size=s.size}function flipFactory(s){var o=makeSequence(s);return o._iter=s,o.size=s.size,o.flip=function(){return s},o.reverse=function(){var o=s.reverse.apply(this);return o.flip=function(){return s.reverse()},o},o.has=function(o){return s.includes(o)},o.includes=function(o){return s.has(o)},o.cacheResult=cacheResultThrough,o.__iterateUncached=function(o,i){var a=this;return s.__iterate((function(s,i){return!1!==o(i,s,a)}),i)},o.__iteratorUncached=function(o,i){if(o===U){var a=s.__iterator(o,i);return new Iterator((function(){var s=a.next();if(!s.done){var o=s.value[0];s.value[0]=s.value[1],s.value[1]=o}return s}))}return s.__iterator(o===V?$:V,i)},o}function mapFactory(s,o,i){var a=makeSequence(s);return a.size=s.size,a.has=function(o){return s.has(o)},a.get=function(a,u){var _=s.get(a,j);return _===j?u:o.call(i,_,a,s)},a.__iterateUncached=function(a,u){var _=this;return s.__iterate((function(s,u,w){return!1!==a(o.call(i,s,u,w),u,_)}),u)},a.__iteratorUncached=function(a,u){var _=s.__iterator(U,u);return new Iterator((function(){var u=_.next();if(u.done)return u;var w=u.value,x=w[0];return iteratorValue(a,x,o.call(i,w[1],x,s),u)}))},a}function reverseFactory(s,o){var i=makeSequence(s);return i._iter=s,i.size=s.size,i.reverse=function(){return s},s.flip&&(i.flip=function(){var o=flipFactory(s);return o.reverse=function(){return s.flip()},o}),i.get=function(i,a){return s.get(o?i:-1-i,a)},i.has=function(i){return s.has(o?i:-1-i)},i.includes=function(o){return s.includes(o)},i.cacheResult=cacheResultThrough,i.__iterate=function(o,i){var a=this;return s.__iterate((function(s,i){return o(s,i,a)}),!i)},i.__iterator=function(o,i){return s.__iterator(o,!i)},i}function filterFactory(s,o,i,a){var u=makeSequence(s);return a&&(u.has=function(a){var u=s.get(a,j);return u!==j&&!!o.call(i,u,a,s)},u.get=function(a,u){var _=s.get(a,j);return _!==j&&o.call(i,_,a,s)?_:u}),u.__iterateUncached=function(u,_){var w=this,x=0;return s.__iterate((function(s,_,C){if(o.call(i,s,_,C))return x++,u(s,a?_:x-1,w)}),_),x},u.__iteratorUncached=function(u,_){var w=s.__iterator(U,_),x=0;return new Iterator((function(){for(;;){var _=w.next();if(_.done)return _;var C=_.value,j=C[0],L=C[1];if(o.call(i,L,j,s))return iteratorValue(u,a?j:x++,L,_)}}))},u}function countByFactory(s,o,i){var a=Map().asMutable();return s.__iterate((function(u,_){a.update(o.call(i,u,_,s),0,(function(s){return s+1}))})),a.asImmutable()}function groupByFactory(s,o,i){var a=isKeyed(s),u=(isOrdered(s)?OrderedMap():Map()).asMutable();s.__iterate((function(_,w){u.update(o.call(i,_,w,s),(function(s){return(s=s||[]).push(a?[w,_]:_),s}))}));var _=iterableClass(s);return u.map((function(o){return reify(s,_(o))}))}function sliceFactory(s,o,i,a){var u=s.size;if(void 0!==o&&(o|=0),void 0!==i&&(i===1/0?i=u:i|=0),wholeSlice(o,i,u))return s;var _=resolveBegin(o,u),w=resolveEnd(i,u);if(_!=_||w!=w)return sliceFactory(s.toSeq().cacheResult(),o,i,a);var x,C=w-_;C==C&&(x=C<0?0:C);var j=makeSequence(s);return j.size=0===x?x:s.size&&x||void 0,!a&&isSeq(s)&&x>=0&&(j.get=function(o,i){return(o=wrapIndex(this,o))>=0&&ox)return iteratorDone();var s=u.next();return a||o===V?s:iteratorValue(o,C-1,o===$?void 0:s.value[1],s)}))},j}function takeWhileFactory(s,o,i){var a=makeSequence(s);return a.__iterateUncached=function(a,u){var _=this;if(u)return this.cacheResult().__iterate(a,u);var w=0;return s.__iterate((function(s,u,x){return o.call(i,s,u,x)&&++w&&a(s,u,_)})),w},a.__iteratorUncached=function(a,u){var _=this;if(u)return this.cacheResult().__iterator(a,u);var w=s.__iterator(U,u),x=!0;return new Iterator((function(){if(!x)return iteratorDone();var s=w.next();if(s.done)return s;var u=s.value,C=u[0],j=u[1];return o.call(i,j,C,_)?a===U?s:iteratorValue(a,C,j,s):(x=!1,iteratorDone())}))},a}function skipWhileFactory(s,o,i,a){var u=makeSequence(s);return u.__iterateUncached=function(u,_){var w=this;if(_)return this.cacheResult().__iterate(u,_);var x=!0,C=0;return s.__iterate((function(s,_,j){if(!x||!(x=o.call(i,s,_,j)))return C++,u(s,a?_:C-1,w)})),C},u.__iteratorUncached=function(u,_){var w=this;if(_)return this.cacheResult().__iterator(u,_);var x=s.__iterator(U,_),C=!0,j=0;return new Iterator((function(){var s,_,L;do{if((s=x.next()).done)return a||u===V?s:iteratorValue(u,j++,u===$?void 0:s.value[1],s);var B=s.value;_=B[0],L=B[1],C&&(C=o.call(i,L,_,w))}while(C);return u===U?s:iteratorValue(u,_,L,s)}))},u}function concatFactory(s,o){var i=isKeyed(s),a=[s].concat(o).map((function(s){return isIterable(s)?i&&(s=KeyedIterable(s)):s=i?keyedSeqFromValue(s):indexedSeqFromValue(Array.isArray(s)?s:[s]),s})).filter((function(s){return 0!==s.size}));if(0===a.length)return s;if(1===a.length){var u=a[0];if(u===s||i&&isKeyed(u)||isIndexed(s)&&isIndexed(u))return u}var _=new ArraySeq(a);return i?_=_.toKeyedSeq():isIndexed(s)||(_=_.toSetSeq()),(_=_.flatten(!0)).size=a.reduce((function(s,o){if(void 0!==s){var i=o.size;if(void 0!==i)return s+i}}),0),_}function flattenFactory(s,o,i){var a=makeSequence(s);return a.__iterateUncached=function(a,u){var _=0,w=!1;function flatDeep(s,x){var C=this;s.__iterate((function(s,u){return(!o||x0}function zipWithFactory(s,o,i){var a=makeSequence(s);return a.size=new ArraySeq(i).map((function(s){return s.size})).min(),a.__iterate=function(s,o){for(var i,a=this.__iterator(V,o),u=0;!(i=a.next()).done&&!1!==s(i.value,u++,this););return u},a.__iteratorUncached=function(s,a){var u=i.map((function(s){return s=Iterable(s),getIterator(a?s.reverse():s)})),_=0,w=!1;return new Iterator((function(){var i;return w||(i=u.map((function(s){return s.next()})),w=i.some((function(s){return s.done}))),w?iteratorDone():iteratorValue(s,_++,o.apply(null,i.map((function(s){return s.value}))))}))},a}function reify(s,o){return isSeq(s)?o:s.constructor(o)}function validateEntry(s){if(s!==Object(s))throw new TypeError("Expected [K, V] tuple: "+s)}function resolveSize(s){return assertNotInfinite(s.size),ensureSize(s)}function iterableClass(s){return isKeyed(s)?KeyedIterable:isIndexed(s)?IndexedIterable:SetIterable}function makeSequence(s){return Object.create((isKeyed(s)?KeyedSeq:isIndexed(s)?IndexedSeq:SetSeq).prototype)}function cacheResultThrough(){return this._iter.cacheResult?(this._iter.cacheResult(),this.size=this._iter.size,this):Seq.prototype.cacheResult.call(this)}function defaultComparator(s,o){return s>o?1:s=0;i--)o={value:arguments[i],next:o};return this.__ownerID?(this.size=s,this._head=o,this.__hash=void 0,this.__altered=!0,this):makeStack(s,o)},Stack.prototype.pushAll=function(s){if(0===(s=IndexedIterable(s)).size)return this;assertNotInfinite(s.size);var o=this.size,i=this._head;return s.reverse().forEach((function(s){o++,i={value:s,next:i}})),this.__ownerID?(this.size=o,this._head=i,this.__hash=void 0,this.__altered=!0,this):makeStack(o,i)},Stack.prototype.pop=function(){return this.slice(1)},Stack.prototype.unshift=function(){return this.push.apply(this,arguments)},Stack.prototype.unshiftAll=function(s){return this.pushAll(s)},Stack.prototype.shift=function(){return this.pop.apply(this,arguments)},Stack.prototype.clear=function(){return 0===this.size?this:this.__ownerID?(this.size=0,this._head=void 0,this.__hash=void 0,this.__altered=!0,this):emptyStack()},Stack.prototype.slice=function(s,o){if(wholeSlice(s,o,this.size))return this;var i=resolveBegin(s,this.size);if(resolveEnd(o,this.size)!==this.size)return IndexedCollection.prototype.slice.call(this,s,o);for(var a=this.size-i,u=this._head;i--;)u=u.next;return this.__ownerID?(this.size=a,this._head=u,this.__hash=void 0,this.__altered=!0,this):makeStack(a,u)},Stack.prototype.__ensureOwner=function(s){return s===this.__ownerID?this:s?makeStack(this.size,this._head,s,this.__hash):(this.__ownerID=s,this.__altered=!1,this)},Stack.prototype.__iterate=function(s,o){if(o)return this.reverse().__iterate(s);for(var i=0,a=this._head;a&&!1!==s(a.value,i++,this);)a=a.next;return i},Stack.prototype.__iterator=function(s,o){if(o)return this.reverse().__iterator(s);var i=0,a=this._head;return new Iterator((function(){if(a){var o=a.value;return a=a.next,iteratorValue(s,i++,o)}return iteratorDone()}))},Stack.isStack=isStack;var at,ct="@@__IMMUTABLE_STACK__@@",lt=Stack.prototype;function makeStack(s,o,i,a){var u=Object.create(lt);return u.size=s,u._head=o,u.__ownerID=i,u.__hash=a,u.__altered=!1,u}function emptyStack(){return at||(at=makeStack(0))}function mixin(s,o){var keyCopier=function(i){s.prototype[i]=o[i]};return Object.keys(o).forEach(keyCopier),Object.getOwnPropertySymbols&&Object.getOwnPropertySymbols(o).forEach(keyCopier),s}lt[ct]=!0,lt.withMutations=$e.withMutations,lt.asMutable=$e.asMutable,lt.asImmutable=$e.asImmutable,lt.wasAltered=$e.wasAltered,Iterable.Iterator=Iterator,mixin(Iterable,{toArray:function(){assertNotInfinite(this.size);var s=new Array(this.size||0);return this.valueSeq().__iterate((function(o,i){s[i]=o})),s},toIndexedSeq:function(){return new ToIndexedSequence(this)},toJS:function(){return this.toSeq().map((function(s){return s&&"function"==typeof s.toJS?s.toJS():s})).__toJS()},toJSON:function(){return this.toSeq().map((function(s){return s&&"function"==typeof s.toJSON?s.toJSON():s})).__toJS()},toKeyedSeq:function(){return new ToKeyedSequence(this,!0)},toMap:function(){return Map(this.toKeyedSeq())},toObject:function(){assertNotInfinite(this.size);var s={};return this.__iterate((function(o,i){s[i]=o})),s},toOrderedMap:function(){return OrderedMap(this.toKeyedSeq())},toOrderedSet:function(){return OrderedSet(isKeyed(this)?this.valueSeq():this)},toSet:function(){return Set(isKeyed(this)?this.valueSeq():this)},toSetSeq:function(){return new ToSetSequence(this)},toSeq:function(){return isIndexed(this)?this.toIndexedSeq():isKeyed(this)?this.toKeyedSeq():this.toSetSeq()},toStack:function(){return Stack(isKeyed(this)?this.valueSeq():this)},toList:function(){return List(isKeyed(this)?this.valueSeq():this)},toString:function(){return"[Iterable]"},__toString:function(s,o){return 0===this.size?s+o:s+" "+this.toSeq().map(this.__toStringMapper).join(", ")+" "+o},concat:function(){return reify(this,concatFactory(this,s.call(arguments,0)))},includes:function(s){return this.some((function(o){return is(o,s)}))},entries:function(){return this.__iterator(U)},every:function(s,o){assertNotInfinite(this.size);var i=!0;return this.__iterate((function(a,u,_){if(!s.call(o,a,u,_))return i=!1,!1})),i},filter:function(s,o){return reify(this,filterFactory(this,s,o,!0))},find:function(s,o,i){var a=this.findEntry(s,o);return a?a[1]:i},forEach:function(s,o){return assertNotInfinite(this.size),this.__iterate(o?s.bind(o):s)},join:function(s){assertNotInfinite(this.size),s=void 0!==s?""+s:",";var o="",i=!0;return this.__iterate((function(a){i?i=!1:o+=s,o+=null!=a?a.toString():""})),o},keys:function(){return this.__iterator($)},map:function(s,o){return reify(this,mapFactory(this,s,o))},reduce:function(s,o,i){var a,u;return assertNotInfinite(this.size),arguments.length<2?u=!0:a=o,this.__iterate((function(o,_,w){u?(u=!1,a=o):a=s.call(i,a,o,_,w)})),a},reduceRight:function(s,o,i){var a=this.toKeyedSeq().reverse();return a.reduce.apply(a,arguments)},reverse:function(){return reify(this,reverseFactory(this,!0))},slice:function(s,o){return reify(this,sliceFactory(this,s,o,!0))},some:function(s,o){return!this.every(not(s),o)},sort:function(s){return reify(this,sortFactory(this,s))},values:function(){return this.__iterator(V)},butLast:function(){return this.slice(0,-1)},isEmpty:function(){return void 0!==this.size?0===this.size:!this.some((function(){return!0}))},count:function(s,o){return ensureSize(s?this.toSeq().filter(s,o):this)},countBy:function(s,o){return countByFactory(this,s,o)},equals:function(s){return deepEqual(this,s)},entrySeq:function(){var s=this;if(s._cache)return new ArraySeq(s._cache);var o=s.toSeq().map(entryMapper).toIndexedSeq();return o.fromEntrySeq=function(){return s.toSeq()},o},filterNot:function(s,o){return this.filter(not(s),o)},findEntry:function(s,o,i){var a=i;return this.__iterate((function(i,u,_){if(s.call(o,i,u,_))return a=[u,i],!1})),a},findKey:function(s,o){var i=this.findEntry(s,o);return i&&i[0]},findLast:function(s,o,i){return this.toKeyedSeq().reverse().find(s,o,i)},findLastEntry:function(s,o,i){return this.toKeyedSeq().reverse().findEntry(s,o,i)},findLastKey:function(s,o){return this.toKeyedSeq().reverse().findKey(s,o)},first:function(){return this.find(returnTrue)},flatMap:function(s,o){return reify(this,flatMapFactory(this,s,o))},flatten:function(s){return reify(this,flattenFactory(this,s,!0))},fromEntrySeq:function(){return new FromEntriesSequence(this)},get:function(s,o){return this.find((function(o,i){return is(i,s)}),void 0,o)},getIn:function(s,o){for(var i,a=this,u=forceIterator(s);!(i=u.next()).done;){var _=i.value;if((a=a&&a.get?a.get(_,j):j)===j)return o}return a},groupBy:function(s,o){return groupByFactory(this,s,o)},has:function(s){return this.get(s,j)!==j},hasIn:function(s){return this.getIn(s,j)!==j},isSubset:function(s){return s="function"==typeof s.includes?s:Iterable(s),this.every((function(o){return s.includes(o)}))},isSuperset:function(s){return(s="function"==typeof s.isSubset?s:Iterable(s)).isSubset(this)},keyOf:function(s){return this.findKey((function(o){return is(o,s)}))},keySeq:function(){return this.toSeq().map(keyMapper).toIndexedSeq()},last:function(){return this.toSeq().reverse().first()},lastKeyOf:function(s){return this.toKeyedSeq().reverse().keyOf(s)},max:function(s){return maxFactory(this,s)},maxBy:function(s,o){return maxFactory(this,o,s)},min:function(s){return maxFactory(this,s?neg(s):defaultNegComparator)},minBy:function(s,o){return maxFactory(this,o?neg(o):defaultNegComparator,s)},rest:function(){return this.slice(1)},skip:function(s){return this.slice(Math.max(0,s))},skipLast:function(s){return reify(this,this.toSeq().reverse().skip(s).reverse())},skipWhile:function(s,o){return reify(this,skipWhileFactory(this,s,o,!0))},skipUntil:function(s,o){return this.skipWhile(not(s),o)},sortBy:function(s,o){return reify(this,sortFactory(this,o,s))},take:function(s){return this.slice(0,Math.max(0,s))},takeLast:function(s){return reify(this,this.toSeq().reverse().take(s).reverse())},takeWhile:function(s,o){return reify(this,takeWhileFactory(this,s,o))},takeUntil:function(s,o){return this.takeWhile(not(s),o)},valueSeq:function(){return this.toIndexedSeq()},hashCode:function(){return this.__hash||(this.__hash=hashIterable(this))}});var ut=Iterable.prototype;ut[o]=!0,ut[Z]=ut.values,ut.__toJS=ut.toArray,ut.__toStringMapper=quoteString,ut.inspect=ut.toSource=function(){return this.toString()},ut.chain=ut.flatMap,ut.contains=ut.includes,mixin(KeyedIterable,{flip:function(){return reify(this,flipFactory(this))},mapEntries:function(s,o){var i=this,a=0;return reify(this,this.toSeq().map((function(u,_){return s.call(o,[_,u],a++,i)})).fromEntrySeq())},mapKeys:function(s,o){var i=this;return reify(this,this.toSeq().flip().map((function(a,u){return s.call(o,a,u,i)})).flip())}});var pt=KeyedIterable.prototype;function keyMapper(s,o){return o}function entryMapper(s,o){return[o,s]}function not(s){return function(){return!s.apply(this,arguments)}}function neg(s){return function(){return-s.apply(this,arguments)}}function quoteString(s){return"string"==typeof s?JSON.stringify(s):String(s)}function defaultZipper(){return arrCopy(arguments)}function defaultNegComparator(s,o){return so?-1:0}function hashIterable(s){if(s.size===1/0)return 0;var o=isOrdered(s),i=isKeyed(s),a=o?1:0;return murmurHashOfSize(s.__iterate(i?o?function(s,o){a=31*a+hashMerge(hash(s),hash(o))|0}:function(s,o){a=a+hashMerge(hash(s),hash(o))|0}:o?function(s){a=31*a+hash(s)|0}:function(s){a=a+hash(s)|0}),a)}function murmurHashOfSize(s,o){return o=le(o,3432918353),o=le(o<<15|o>>>-15,461845907),o=le(o<<13|o>>>-13,5),o=le((o=o+3864292196^s)^o>>>16,2246822507),o=smi((o=le(o^o>>>13,3266489909))^o>>>16)}function hashMerge(s,o){return s^o+2654435769+(s<<6)+(s>>2)}return pt[i]=!0,pt[Z]=ut.entries,pt.__toJS=ut.toObject,pt.__toStringMapper=function(s,o){return JSON.stringify(o)+": "+quoteString(s)},mixin(IndexedIterable,{toKeyedSeq:function(){return new ToKeyedSequence(this,!1)},filter:function(s,o){return reify(this,filterFactory(this,s,o,!1))},findIndex:function(s,o){var i=this.findEntry(s,o);return i?i[0]:-1},indexOf:function(s){var o=this.keyOf(s);return void 0===o?-1:o},lastIndexOf:function(s){var o=this.lastKeyOf(s);return void 0===o?-1:o},reverse:function(){return reify(this,reverseFactory(this,!1))},slice:function(s,o){return reify(this,sliceFactory(this,s,o,!1))},splice:function(s,o){var i=arguments.length;if(o=Math.max(0|o,0),0===i||2===i&&!o)return this;s=resolveBegin(s,s<0?this.count():this.size);var a=this.slice(0,s);return reify(this,1===i?a:a.concat(arrCopy(arguments,2),this.slice(s+o)))},findLastIndex:function(s,o){var i=this.findLastEntry(s,o);return i?i[0]:-1},first:function(){return this.get(0)},flatten:function(s){return reify(this,flattenFactory(this,s,!1))},get:function(s,o){return(s=wrapIndex(this,s))<0||this.size===1/0||void 0!==this.size&&s>this.size?o:this.find((function(o,i){return i===s}),void 0,o)},has:function(s){return(s=wrapIndex(this,s))>=0&&(void 0!==this.size?this.size===1/0||s{"use strict";i(71340);var a=i(92046);s.exports=a.Object.assign},9999:(s,o,i)=>{var a=i(37217),u=i(83729),_=i(16547),w=i(74733),x=i(43838),C=i(93290),j=i(23007),L=i(92271),B=i(48948),$=i(50002),V=i(83349),U=i(5861),z=i(76189),Y=i(77199),Z=i(35529),ee=i(56449),ie=i(3656),ae=i(87730),ce=i(23805),le=i(38440),pe=i(95950),de=i(37241),fe="[object Arguments]",ye="[object Function]",be="[object Object]",_e={};_e[fe]=_e["[object Array]"]=_e["[object ArrayBuffer]"]=_e["[object DataView]"]=_e["[object Boolean]"]=_e["[object Date]"]=_e["[object Float32Array]"]=_e["[object Float64Array]"]=_e["[object Int8Array]"]=_e["[object Int16Array]"]=_e["[object Int32Array]"]=_e["[object Map]"]=_e["[object Number]"]=_e[be]=_e["[object RegExp]"]=_e["[object Set]"]=_e["[object String]"]=_e["[object Symbol]"]=_e["[object Uint8Array]"]=_e["[object Uint8ClampedArray]"]=_e["[object Uint16Array]"]=_e["[object Uint32Array]"]=!0,_e["[object Error]"]=_e[ye]=_e["[object WeakMap]"]=!1,s.exports=function baseClone(s,o,i,Se,we,xe){var Pe,Te=1&o,Re=2&o,$e=4&o;if(i&&(Pe=we?i(s,Se,we,xe):i(s)),void 0!==Pe)return Pe;if(!ce(s))return s;var qe=ee(s);if(qe){if(Pe=z(s),!Te)return j(s,Pe)}else{var ze=U(s),We=ze==ye||"[object GeneratorFunction]"==ze;if(ie(s))return C(s,Te);if(ze==be||ze==fe||We&&!we){if(Pe=Re||We?{}:Z(s),!Te)return Re?B(s,x(Pe,s)):L(s,w(Pe,s))}else{if(!_e[ze])return we?s:{};Pe=Y(s,ze,Te)}}xe||(xe=new a);var He=xe.get(s);if(He)return He;xe.set(s,Pe),le(s)?s.forEach((function(a){Pe.add(baseClone(a,o,i,a,s,xe))})):ae(s)&&s.forEach((function(a,u){Pe.set(u,baseClone(a,o,i,u,s,xe))}));var Ye=qe?void 0:($e?Re?V:$:Re?de:pe)(s);return u(Ye||s,(function(a,u){Ye&&(a=s[u=a]),_(Pe,u,baseClone(a,o,i,u,s,xe))})),Pe}},10023:(s,o,i)=>{const a=i(6205),INTS=()=>[{type:a.RANGE,from:48,to:57}],WORDS=()=>[{type:a.CHAR,value:95},{type:a.RANGE,from:97,to:122},{type:a.RANGE,from:65,to:90}].concat(INTS()),WHITESPACE=()=>[{type:a.CHAR,value:9},{type:a.CHAR,value:10},{type:a.CHAR,value:11},{type:a.CHAR,value:12},{type:a.CHAR,value:13},{type:a.CHAR,value:32},{type:a.CHAR,value:160},{type:a.CHAR,value:5760},{type:a.RANGE,from:8192,to:8202},{type:a.CHAR,value:8232},{type:a.CHAR,value:8233},{type:a.CHAR,value:8239},{type:a.CHAR,value:8287},{type:a.CHAR,value:12288},{type:a.CHAR,value:65279}];o.words=()=>({type:a.SET,set:WORDS(),not:!1}),o.notWords=()=>({type:a.SET,set:WORDS(),not:!0}),o.ints=()=>({type:a.SET,set:INTS(),not:!1}),o.notInts=()=>({type:a.SET,set:INTS(),not:!0}),o.whitespace=()=>({type:a.SET,set:WHITESPACE(),not:!1}),o.notWhitespace=()=>({type:a.SET,set:WHITESPACE(),not:!0}),o.anyChar=()=>({type:a.SET,set:[{type:a.CHAR,value:10},{type:a.CHAR,value:13},{type:a.CHAR,value:8232},{type:a.CHAR,value:8233}],not:!0})},10043:(s,o,i)=>{"use strict";var a=i(54018),u=String,_=TypeError;s.exports=function(s){if(a(s))return s;throw new _("Can't set "+u(s)+" as a prototype")}},10124:(s,o,i)=>{var a=i(9325);s.exports=function(){return a.Date.now()}},10300:(s,o,i)=>{"use strict";var a=i(13930),u=i(82159),_=i(36624),w=i(4640),x=i(73448),C=TypeError;s.exports=function(s,o){var i=arguments.length<2?x(s):o;if(u(i))return _(a(i,s));throw new C(w(s)+" is not iterable")}},10316:(s,o,i)=>{const a=i(2404),u=i(55973),_=i(92340);class Element{constructor(s,o,i){o&&(this.meta=o),i&&(this.attributes=i),this.content=s}freeze(){Object.isFrozen(this)||(this._meta&&(this.meta.parent=this,this.meta.freeze()),this._attributes&&(this.attributes.parent=this,this.attributes.freeze()),this.children.forEach((s=>{s.parent=this,s.freeze()}),this),this.content&&Array.isArray(this.content)&&Object.freeze(this.content),Object.freeze(this))}primitive(){}clone(){const s=new this.constructor;return s.element=this.element,this.meta.length&&(s._meta=this.meta.clone()),this.attributes.length&&(s._attributes=this.attributes.clone()),this.content?this.content.clone?s.content=this.content.clone():Array.isArray(this.content)?s.content=this.content.map((s=>s.clone())):s.content=this.content:s.content=this.content,s}toValue(){return this.content instanceof Element?this.content.toValue():this.content instanceof u?{key:this.content.key.toValue(),value:this.content.value?this.content.value.toValue():void 0}:this.content&&this.content.map?this.content.map((s=>s.toValue()),this):this.content}toRef(s){if(""===this.id.toValue())throw Error("Cannot create reference to an element that does not contain an ID");const o=new this.RefElement(this.id.toValue());return s&&(o.path=s),o}findRecursive(...s){if(arguments.length>1&&!this.isFrozen)throw new Error("Cannot find recursive with multiple element names without first freezing the element. Call `element.freeze()`");const o=s.pop();let i=new _;const append=(s,o)=>(s.push(o),s),checkElement=(s,i)=>{i.element===o&&s.push(i);const a=i.findRecursive(o);return a&&a.reduce(append,s),i.content instanceof u&&(i.content.key&&checkElement(s,i.content.key),i.content.value&&checkElement(s,i.content.value)),s};return this.content&&(this.content.element&&checkElement(i,this.content),Array.isArray(this.content)&&this.content.reduce(checkElement,i)),s.isEmpty||(i=i.filter((o=>{let i=o.parents.map((s=>s.element));for(const o in s){const a=s[o],u=i.indexOf(a);if(-1===u)return!1;i=i.splice(0,u)}return!0}))),i}set(s){return this.content=s,this}equals(s){return a(this.toValue(),s)}getMetaProperty(s,o){if(!this.meta.hasKey(s)){if(this.isFrozen){const s=this.refract(o);return s.freeze(),s}this.meta.set(s,o)}return this.meta.get(s)}setMetaProperty(s,o){this.meta.set(s,o)}get element(){return this._storedElement||"element"}set element(s){this._storedElement=s}get content(){return this._content}set content(s){if(s instanceof Element)this._content=s;else if(s instanceof _)this.content=s.elements;else if("string"==typeof s||"number"==typeof s||"boolean"==typeof s||"null"===s||null==s)this._content=s;else if(s instanceof u)this._content=s;else if(Array.isArray(s))this._content=s.map(this.refract);else{if("object"!=typeof s)throw new Error("Cannot set content to given value");this._content=Object.keys(s).map((o=>new this.MemberElement(o,s[o])))}}get meta(){if(!this._meta){if(this.isFrozen){const s=new this.ObjectElement;return s.freeze(),s}this._meta=new this.ObjectElement}return this._meta}set meta(s){s instanceof this.ObjectElement?this._meta=s:this.meta.set(s||{})}get attributes(){if(!this._attributes){if(this.isFrozen){const s=new this.ObjectElement;return s.freeze(),s}this._attributes=new this.ObjectElement}return this._attributes}set attributes(s){s instanceof this.ObjectElement?this._attributes=s:this.attributes.set(s||{})}get id(){return this.getMetaProperty("id","")}set id(s){this.setMetaProperty("id",s)}get classes(){return this.getMetaProperty("classes",[])}set classes(s){this.setMetaProperty("classes",s)}get title(){return this.getMetaProperty("title","")}set title(s){this.setMetaProperty("title",s)}get description(){return this.getMetaProperty("description","")}set description(s){this.setMetaProperty("description",s)}get links(){return this.getMetaProperty("links",[])}set links(s){this.setMetaProperty("links",s)}get isFrozen(){return Object.isFrozen(this)}get parents(){let{parent:s}=this;const o=new _;for(;s;)o.push(s),s=s.parent;return o}get children(){if(Array.isArray(this.content))return new _(this.content);if(this.content instanceof u){const s=new _([this.content.key]);return this.content.value&&s.push(this.content.value),s}return this.content instanceof Element?new _([this.content]):new _}get recursiveChildren(){const s=new _;return this.children.forEach((o=>{s.push(o),o.recursiveChildren.forEach((o=>{s.push(o)}))})),s}}s.exports=Element},10392:s=>{s.exports=function getValue(s,o){return null==s?void 0:s[o]}},10776:(s,o,i)=>{var a=i(30756),u=i(95950);s.exports=function getMatchData(s){for(var o=u(s),i=o.length;i--;){var _=o[i],w=s[_];o[i]=[_,w,a(w)]}return o}},10866:(s,o,i)=>{const a=i(6048),u=i(92340);class ObjectSlice extends u{map(s,o){return this.elements.map((i=>s.bind(o)(i.value,i.key,i)))}filter(s,o){return new ObjectSlice(this.elements.filter((i=>s.bind(o)(i.value,i.key,i))))}reject(s,o){return this.filter(a(s.bind(o)))}forEach(s,o){return this.elements.forEach(((i,a)=>{s.bind(o)(i.value,i.key,i,a)}))}keys(){return this.map(((s,o)=>o.toValue()))}values(){return this.map((s=>s.toValue()))}}s.exports=ObjectSlice},11042:(s,o,i)=>{"use strict";var a=i(85582),u=i(1907),_=i(24443),w=i(87170),x=i(36624),C=u([].concat);s.exports=a("Reflect","ownKeys")||function ownKeys(s){var o=_.f(x(s)),i=w.f;return i?C(o,i(s)):o}},11091:(s,o,i)=>{"use strict";var a=i(45951),u=i(76024),_=i(92361),w=i(62250),x=i(13846).f,C=i(7463),j=i(92046),L=i(28311),B=i(61626),$=i(49724);i(36128);var wrapConstructor=function(s){var Wrapper=function(o,i,a){if(this instanceof Wrapper){switch(arguments.length){case 0:return new s;case 1:return new s(o);case 2:return new s(o,i)}return new s(o,i,a)}return u(s,this,arguments)};return Wrapper.prototype=s.prototype,Wrapper};s.exports=function(s,o){var i,u,V,U,z,Y,Z,ee,ie,ae=s.target,ce=s.global,le=s.stat,pe=s.proto,de=ce?a:le?a[ae]:a[ae]&&a[ae].prototype,fe=ce?j:j[ae]||B(j,ae,{})[ae],ye=fe.prototype;for(U in o)u=!(i=C(ce?U:ae+(le?".":"#")+U,s.forced))&&de&&$(de,U),Y=fe[U],u&&(Z=s.dontCallGetSet?(ie=x(de,U))&&ie.value:de[U]),z=u&&Z?Z:o[U],(i||pe||typeof Y!=typeof z)&&(ee=s.bind&&u?L(z,a):s.wrap&&u?wrapConstructor(z):pe&&w(z)?_(z):z,(s.sham||z&&z.sham||Y&&Y.sham)&&B(ee,"sham",!0),B(fe,U,ee),pe&&($(j,V=ae+"Prototype")||B(j,V,{}),B(j[V],U,z),s.real&&ye&&(i||!ye[U])&&B(ye,U,z)))}},11287:s=>{s.exports=function getHolder(s){return s.placeholder}},11331:(s,o,i)=>{var a=i(72552),u=i(28879),_=i(40346),w=Function.prototype,x=Object.prototype,C=w.toString,j=x.hasOwnProperty,L=C.call(Object);s.exports=function isPlainObject(s){if(!_(s)||"[object Object]"!=a(s))return!1;var o=u(s);if(null===o)return!0;var i=j.call(o,"constructor")&&o.constructor;return"function"==typeof i&&i instanceof i&&C.call(i)==L}},11470:(s,o,i)=>{"use strict";var a=i(1907),u=i(65482),_=i(90160),w=i(74239),x=a("".charAt),C=a("".charCodeAt),j=a("".slice),createMethod=function(s){return function(o,i){var a,L,B=_(w(o)),$=u(i),V=B.length;return $<0||$>=V?s?"":void 0:(a=C(B,$))<55296||a>56319||$+1===V||(L=C(B,$+1))<56320||L>57343?s?x(B,$):a:s?j(B,$,$+2):L-56320+(a-55296<<10)+65536}};s.exports={codeAt:createMethod(!1),charAt:createMethod(!0)}},11842:(s,o,i)=>{var a=i(82819),u=i(9325);s.exports=function createBind(s,o,i){var _=1&o,w=a(s);return function wrapper(){return(this&&this!==u&&this instanceof wrapper?w:s).apply(_?i:this,arguments)}}},12242:(s,o,i)=>{const a=i(10316);s.exports=class BooleanElement extends a{constructor(s,o,i){super(s,o,i),this.element="boolean"}primitive(){return"boolean"}}},12507:(s,o,i)=>{var a=i(28754),u=i(49698),_=i(63912),w=i(13222);s.exports=function createCaseFirst(s){return function(o){o=w(o);var i=u(o)?_(o):void 0,x=i?i[0]:o.charAt(0),C=i?a(i,1).join(""):o.slice(1);return x[s]()+C}}},12560:(s,o,i)=>{"use strict";i(99363);var a=i(19287),u=i(45951),_=i(14840),w=i(93742);for(var x in a)_(u[x],x),w[x]=w.Array},12651:(s,o,i)=>{var a=i(74218);s.exports=function getMapData(s,o){var i=s.__data__;return a(o)?i["string"==typeof o?"string":"hash"]:i.map}},12749:(s,o,i)=>{var a=i(81042),u=Object.prototype.hasOwnProperty;s.exports=function hashHas(s){var o=this.__data__;return a?void 0!==o[s]:u.call(o,s)}},13222:(s,o,i)=>{var a=i(77556);s.exports=function toString(s){return null==s?"":a(s)}},13846:(s,o,i)=>{"use strict";var a=i(39447),u=i(13930),_=i(22574),w=i(75817),x=i(4993),C=i(70470),j=i(49724),L=i(73648),B=Object.getOwnPropertyDescriptor;o.f=a?B:function getOwnPropertyDescriptor(s,o){if(s=x(s),o=C(o),L)try{return B(s,o)}catch(s){}if(j(s,o))return w(!u(_.f,s,o),s[o])}},13930:(s,o,i)=>{"use strict";var a=i(41505),u=Function.prototype.call;s.exports=a?u.bind(u):function(){return u.apply(u,arguments)}},14248:s=>{s.exports=function arraySome(s,o){for(var i=-1,a=null==s?0:s.length;++i{s.exports=function arrayPush(s,o){for(var i=-1,a=o.length,u=s.length;++i{const a=i(10316);s.exports=class RefElement extends a{constructor(s,o,i){super(s||[],o,i),this.element="ref",this.path||(this.path="element")}get path(){return this.attributes.get("path")}set path(s){this.attributes.set("path",s)}}},14744:s=>{"use strict";var o=function isMergeableObject(s){return function isNonNullObject(s){return!!s&&"object"==typeof s}(s)&&!function isSpecial(s){var o=Object.prototype.toString.call(s);return"[object RegExp]"===o||"[object Date]"===o||function isReactElement(s){return s.$$typeof===i}(s)}(s)};var i="function"==typeof Symbol&&Symbol.for?Symbol.for("react.element"):60103;function cloneUnlessOtherwiseSpecified(s,o){return!1!==o.clone&&o.isMergeableObject(s)?deepmerge(function emptyTarget(s){return Array.isArray(s)?[]:{}}(s),s,o):s}function defaultArrayMerge(s,o,i){return s.concat(o).map((function(s){return cloneUnlessOtherwiseSpecified(s,i)}))}function getKeys(s){return Object.keys(s).concat(function getEnumerableOwnPropertySymbols(s){return Object.getOwnPropertySymbols?Object.getOwnPropertySymbols(s).filter((function(o){return Object.propertyIsEnumerable.call(s,o)})):[]}(s))}function propertyIsOnObject(s,o){try{return o in s}catch(s){return!1}}function mergeObject(s,o,i){var a={};return i.isMergeableObject(s)&&getKeys(s).forEach((function(o){a[o]=cloneUnlessOtherwiseSpecified(s[o],i)})),getKeys(o).forEach((function(u){(function propertyIsUnsafe(s,o){return propertyIsOnObject(s,o)&&!(Object.hasOwnProperty.call(s,o)&&Object.propertyIsEnumerable.call(s,o))})(s,u)||(propertyIsOnObject(s,u)&&i.isMergeableObject(o[u])?a[u]=function getMergeFunction(s,o){if(!o.customMerge)return deepmerge;var i=o.customMerge(s);return"function"==typeof i?i:deepmerge}(u,i)(s[u],o[u],i):a[u]=cloneUnlessOtherwiseSpecified(o[u],i))})),a}function deepmerge(s,i,a){(a=a||{}).arrayMerge=a.arrayMerge||defaultArrayMerge,a.isMergeableObject=a.isMergeableObject||o,a.cloneUnlessOtherwiseSpecified=cloneUnlessOtherwiseSpecified;var u=Array.isArray(i);return u===Array.isArray(s)?u?a.arrayMerge(s,i,a):mergeObject(s,i,a):cloneUnlessOtherwiseSpecified(i,a)}deepmerge.all=function deepmergeAll(s,o){if(!Array.isArray(s))throw new Error("first argument should be an array");return s.reduce((function(s,i){return deepmerge(s,i,o)}),{})};var a=deepmerge;s.exports=a},14792:(s,o,i)=>{var a=i(13222),u=i(55808);s.exports=function capitalize(s){return u(a(s).toLowerCase())}},14840:(s,o,i)=>{"use strict";var a=i(52623),u=i(74284).f,_=i(61626),w=i(49724),x=i(54878),C=i(76264)("toStringTag");s.exports=function(s,o,i,j){var L=i?s:s&&s.prototype;L&&(w(L,C)||u(L,C,{configurable:!0,value:o}),j&&!a&&_(L,"toString",x))}},14974:s=>{s.exports=function safeGet(s,o){if(("constructor"!==o||"function"!=typeof s[o])&&"__proto__"!=o)return s[o]}},15287:(s,o)=>{"use strict";var i=Symbol.for("react.element"),a=Symbol.for("react.portal"),u=Symbol.for("react.fragment"),_=Symbol.for("react.strict_mode"),w=Symbol.for("react.profiler"),x=Symbol.for("react.provider"),C=Symbol.for("react.context"),j=Symbol.for("react.forward_ref"),L=Symbol.for("react.suspense"),B=Symbol.for("react.memo"),$=Symbol.for("react.lazy"),V=Symbol.iterator;var U={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},z=Object.assign,Y={};function E(s,o,i){this.props=s,this.context=o,this.refs=Y,this.updater=i||U}function F(){}function G(s,o,i){this.props=s,this.context=o,this.refs=Y,this.updater=i||U}E.prototype.isReactComponent={},E.prototype.setState=function(s,o){if("object"!=typeof s&&"function"!=typeof s&&null!=s)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,s,o,"setState")},E.prototype.forceUpdate=function(s){this.updater.enqueueForceUpdate(this,s,"forceUpdate")},F.prototype=E.prototype;var Z=G.prototype=new F;Z.constructor=G,z(Z,E.prototype),Z.isPureReactComponent=!0;var ee=Array.isArray,ie=Object.prototype.hasOwnProperty,ae={current:null},ce={key:!0,ref:!0,__self:!0,__source:!0};function M(s,o,a){var u,_={},w=null,x=null;if(null!=o)for(u in void 0!==o.ref&&(x=o.ref),void 0!==o.key&&(w=""+o.key),o)ie.call(o,u)&&!ce.hasOwnProperty(u)&&(_[u]=o[u]);var C=arguments.length-2;if(1===C)_.children=a;else if(1{var a=i(96131);s.exports=function arrayIncludes(s,o){return!!(null==s?0:s.length)&&a(s,o,0)>-1}},15340:()=>{},15389:(s,o,i)=>{var a=i(93663),u=i(87978),_=i(83488),w=i(56449),x=i(50583);s.exports=function baseIteratee(s){return"function"==typeof s?s:null==s?_:"object"==typeof s?w(s)?u(s[0],s[1]):a(s):x(s)}},15972:(s,o,i)=>{"use strict";var a=i(49724),u=i(62250),_=i(39298),w=i(92522),x=i(57382),C=w("IE_PROTO"),j=Object,L=j.prototype;s.exports=x?j.getPrototypeOf:function(s){var o=_(s);if(a(o,C))return o[C];var i=o.constructor;return u(i)&&o instanceof i?i.prototype:o instanceof j?L:null}},16038:(s,o,i)=>{var a=i(5861),u=i(40346);s.exports=function baseIsSet(s){return u(s)&&"[object Set]"==a(s)}},16426:s=>{s.exports=function(){var s=document.getSelection();if(!s.rangeCount)return function(){};for(var o=document.activeElement,i=[],a=0;a{var a=i(43360),u=i(75288),_=Object.prototype.hasOwnProperty;s.exports=function assignValue(s,o,i){var w=s[o];_.call(s,o)&&u(w,i)&&(void 0!==i||o in s)||a(s,o,i)}},16708:(s,o,i)=>{"use strict";var a,u=i(65606);function CorkedRequest(s){var o=this;this.next=null,this.entry=null,this.finish=function(){!function onCorkedFinish(s,o,i){var a=s.entry;s.entry=null;for(;a;){var u=a.callback;o.pendingcb--,u(i),a=a.next}o.corkedRequestsFree.next=s}(o,s)}}s.exports=Writable,Writable.WritableState=WritableState;var _={deprecate:i(94643)},w=i(40345),x=i(48287).Buffer,C=(void 0!==i.g?i.g:"undefined"!=typeof window?window:"undefined"!=typeof self?self:{}).Uint8Array||function(){};var j,L=i(75896),B=i(65291).getHighWaterMark,$=i(86048).F,V=$.ERR_INVALID_ARG_TYPE,U=$.ERR_METHOD_NOT_IMPLEMENTED,z=$.ERR_MULTIPLE_CALLBACK,Y=$.ERR_STREAM_CANNOT_PIPE,Z=$.ERR_STREAM_DESTROYED,ee=$.ERR_STREAM_NULL_VALUES,ie=$.ERR_STREAM_WRITE_AFTER_END,ae=$.ERR_UNKNOWN_ENCODING,ce=L.errorOrDestroy;function nop(){}function WritableState(s,o,_){a=a||i(25382),s=s||{},"boolean"!=typeof _&&(_=o instanceof a),this.objectMode=!!s.objectMode,_&&(this.objectMode=this.objectMode||!!s.writableObjectMode),this.highWaterMark=B(this,s,"writableHighWaterMark",_),this.finalCalled=!1,this.needDrain=!1,this.ending=!1,this.ended=!1,this.finished=!1,this.destroyed=!1;var w=!1===s.decodeStrings;this.decodeStrings=!w,this.defaultEncoding=s.defaultEncoding||"utf8",this.length=0,this.writing=!1,this.corked=0,this.sync=!0,this.bufferProcessing=!1,this.onwrite=function(s){!function onwrite(s,o){var i=s._writableState,a=i.sync,_=i.writecb;if("function"!=typeof _)throw new z;if(function onwriteStateUpdate(s){s.writing=!1,s.writecb=null,s.length-=s.writelen,s.writelen=0}(i),o)!function onwriteError(s,o,i,a,_){--o.pendingcb,i?(u.nextTick(_,a),u.nextTick(finishMaybe,s,o),s._writableState.errorEmitted=!0,ce(s,a)):(_(a),s._writableState.errorEmitted=!0,ce(s,a),finishMaybe(s,o))}(s,i,a,o,_);else{var w=needFinish(i)||s.destroyed;w||i.corked||i.bufferProcessing||!i.bufferedRequest||clearBuffer(s,i),a?u.nextTick(afterWrite,s,i,w,_):afterWrite(s,i,w,_)}}(o,s)},this.writecb=null,this.writelen=0,this.bufferedRequest=null,this.lastBufferedRequest=null,this.pendingcb=0,this.prefinished=!1,this.errorEmitted=!1,this.emitClose=!1!==s.emitClose,this.autoDestroy=!!s.autoDestroy,this.bufferedRequestCount=0,this.corkedRequestsFree=new CorkedRequest(this)}function Writable(s){var o=this instanceof(a=a||i(25382));if(!o&&!j.call(Writable,this))return new Writable(s);this._writableState=new WritableState(s,this,o),this.writable=!0,s&&("function"==typeof s.write&&(this._write=s.write),"function"==typeof s.writev&&(this._writev=s.writev),"function"==typeof s.destroy&&(this._destroy=s.destroy),"function"==typeof s.final&&(this._final=s.final)),w.call(this)}function doWrite(s,o,i,a,u,_,w){o.writelen=a,o.writecb=w,o.writing=!0,o.sync=!0,o.destroyed?o.onwrite(new Z("write")):i?s._writev(u,o.onwrite):s._write(u,_,o.onwrite),o.sync=!1}function afterWrite(s,o,i,a){i||function onwriteDrain(s,o){0===o.length&&o.needDrain&&(o.needDrain=!1,s.emit("drain"))}(s,o),o.pendingcb--,a(),finishMaybe(s,o)}function clearBuffer(s,o){o.bufferProcessing=!0;var i=o.bufferedRequest;if(s._writev&&i&&i.next){var a=o.bufferedRequestCount,u=new Array(a),_=o.corkedRequestsFree;_.entry=i;for(var w=0,x=!0;i;)u[w]=i,i.isBuf||(x=!1),i=i.next,w+=1;u.allBuffers=x,doWrite(s,o,!0,o.length,u,"",_.finish),o.pendingcb++,o.lastBufferedRequest=null,_.next?(o.corkedRequestsFree=_.next,_.next=null):o.corkedRequestsFree=new CorkedRequest(o),o.bufferedRequestCount=0}else{for(;i;){var C=i.chunk,j=i.encoding,L=i.callback;if(doWrite(s,o,!1,o.objectMode?1:C.length,C,j,L),i=i.next,o.bufferedRequestCount--,o.writing)break}null===i&&(o.lastBufferedRequest=null)}o.bufferedRequest=i,o.bufferProcessing=!1}function needFinish(s){return s.ending&&0===s.length&&null===s.bufferedRequest&&!s.finished&&!s.writing}function callFinal(s,o){s._final((function(i){o.pendingcb--,i&&ce(s,i),o.prefinished=!0,s.emit("prefinish"),finishMaybe(s,o)}))}function finishMaybe(s,o){var i=needFinish(o);if(i&&(function prefinish(s,o){o.prefinished||o.finalCalled||("function"!=typeof s._final||o.destroyed?(o.prefinished=!0,s.emit("prefinish")):(o.pendingcb++,o.finalCalled=!0,u.nextTick(callFinal,s,o)))}(s,o),0===o.pendingcb&&(o.finished=!0,s.emit("finish"),o.autoDestroy))){var a=s._readableState;(!a||a.autoDestroy&&a.endEmitted)&&s.destroy()}return i}i(56698)(Writable,w),WritableState.prototype.getBuffer=function getBuffer(){for(var s=this.bufferedRequest,o=[];s;)o.push(s),s=s.next;return o},function(){try{Object.defineProperty(WritableState.prototype,"buffer",{get:_.deprecate((function writableStateBufferGetter(){return this.getBuffer()}),"_writableState.buffer is deprecated. Use _writableState.getBuffer instead.","DEP0003")})}catch(s){}}(),"function"==typeof Symbol&&Symbol.hasInstance&&"function"==typeof Function.prototype[Symbol.hasInstance]?(j=Function.prototype[Symbol.hasInstance],Object.defineProperty(Writable,Symbol.hasInstance,{value:function value(s){return!!j.call(this,s)||this===Writable&&(s&&s._writableState instanceof WritableState)}})):j=function realHasInstance(s){return s instanceof this},Writable.prototype.pipe=function(){ce(this,new Y)},Writable.prototype.write=function(s,o,i){var a=this._writableState,_=!1,w=!a.objectMode&&function _isUint8Array(s){return x.isBuffer(s)||s instanceof C}(s);return w&&!x.isBuffer(s)&&(s=function _uint8ArrayToBuffer(s){return x.from(s)}(s)),"function"==typeof o&&(i=o,o=null),w?o="buffer":o||(o=a.defaultEncoding),"function"!=typeof i&&(i=nop),a.ending?function writeAfterEnd(s,o){var i=new ie;ce(s,i),u.nextTick(o,i)}(this,i):(w||function validChunk(s,o,i,a){var _;return null===i?_=new ee:"string"==typeof i||o.objectMode||(_=new V("chunk",["string","Buffer"],i)),!_||(ce(s,_),u.nextTick(a,_),!1)}(this,a,s,i))&&(a.pendingcb++,_=function writeOrBuffer(s,o,i,a,u,_){if(!i){var w=function decodeChunk(s,o,i){s.objectMode||!1===s.decodeStrings||"string"!=typeof o||(o=x.from(o,i));return o}(o,a,u);a!==w&&(i=!0,u="buffer",a=w)}var C=o.objectMode?1:a.length;o.length+=C;var j=o.length-1))throw new ae(s);return this._writableState.defaultEncoding=s,this},Object.defineProperty(Writable.prototype,"writableBuffer",{enumerable:!1,get:function get(){return this._writableState&&this._writableState.getBuffer()}}),Object.defineProperty(Writable.prototype,"writableHighWaterMark",{enumerable:!1,get:function get(){return this._writableState.highWaterMark}}),Writable.prototype._write=function(s,o,i){i(new U("_write()"))},Writable.prototype._writev=null,Writable.prototype.end=function(s,o,i){var a=this._writableState;return"function"==typeof s?(i=s,s=null,o=null):"function"==typeof o&&(i=o,o=null),null!=s&&this.write(s,o),a.corked&&(a.corked=1,this.uncork()),a.ending||function endWritable(s,o,i){o.ending=!0,finishMaybe(s,o),i&&(o.finished?u.nextTick(i):s.once("finish",i));o.ended=!0,s.writable=!1}(this,a,i),this},Object.defineProperty(Writable.prototype,"writableLength",{enumerable:!1,get:function get(){return this._writableState.length}}),Object.defineProperty(Writable.prototype,"destroyed",{enumerable:!1,get:function get(){return void 0!==this._writableState&&this._writableState.destroyed},set:function set(s){this._writableState&&(this._writableState.destroyed=s)}}),Writable.prototype.destroy=L.destroy,Writable.prototype._undestroy=L.undestroy,Writable.prototype._destroy=function(s,o){o(s)}},16946:(s,o,i)=>{"use strict";var a=i(1907),u=i(98828),_=i(45807),w=Object,x=a("".split);s.exports=u((function(){return!w("z").propertyIsEnumerable(0)}))?function(s){return"String"===_(s)?x(s,""):w(s)}:w},16962:(s,o)=>{o.aliasToReal={each:"forEach",eachRight:"forEachRight",entries:"toPairs",entriesIn:"toPairsIn",extend:"assignIn",extendAll:"assignInAll",extendAllWith:"assignInAllWith",extendWith:"assignInWith",first:"head",conforms:"conformsTo",matches:"isMatch",property:"get",__:"placeholder",F:"stubFalse",T:"stubTrue",all:"every",allPass:"overEvery",always:"constant",any:"some",anyPass:"overSome",apply:"spread",assoc:"set",assocPath:"set",complement:"negate",compose:"flowRight",contains:"includes",dissoc:"unset",dissocPath:"unset",dropLast:"dropRight",dropLastWhile:"dropRightWhile",equals:"isEqual",identical:"eq",indexBy:"keyBy",init:"initial",invertObj:"invert",juxt:"over",omitAll:"omit",nAry:"ary",path:"get",pathEq:"matchesProperty",pathOr:"getOr",paths:"at",pickAll:"pick",pipe:"flow",pluck:"map",prop:"get",propEq:"matchesProperty",propOr:"getOr",props:"at",symmetricDifference:"xor",symmetricDifferenceBy:"xorBy",symmetricDifferenceWith:"xorWith",takeLast:"takeRight",takeLastWhile:"takeRightWhile",unapply:"rest",unnest:"flatten",useWith:"overArgs",where:"conformsTo",whereEq:"isMatch",zipObj:"zipObject"},o.aryMethod={1:["assignAll","assignInAll","attempt","castArray","ceil","create","curry","curryRight","defaultsAll","defaultsDeepAll","floor","flow","flowRight","fromPairs","invert","iteratee","memoize","method","mergeAll","methodOf","mixin","nthArg","over","overEvery","overSome","rest","reverse","round","runInContext","spread","template","trim","trimEnd","trimStart","uniqueId","words","zipAll"],2:["add","after","ary","assign","assignAllWith","assignIn","assignInAllWith","at","before","bind","bindAll","bindKey","chunk","cloneDeepWith","cloneWith","concat","conformsTo","countBy","curryN","curryRightN","debounce","defaults","defaultsDeep","defaultTo","delay","difference","divide","drop","dropRight","dropRightWhile","dropWhile","endsWith","eq","every","filter","find","findIndex","findKey","findLast","findLastIndex","findLastKey","flatMap","flatMapDeep","flattenDepth","forEach","forEachRight","forIn","forInRight","forOwn","forOwnRight","get","groupBy","gt","gte","has","hasIn","includes","indexOf","intersection","invertBy","invoke","invokeMap","isEqual","isMatch","join","keyBy","lastIndexOf","lt","lte","map","mapKeys","mapValues","matchesProperty","maxBy","meanBy","merge","mergeAllWith","minBy","multiply","nth","omit","omitBy","overArgs","pad","padEnd","padStart","parseInt","partial","partialRight","partition","pick","pickBy","propertyOf","pull","pullAll","pullAt","random","range","rangeRight","rearg","reject","remove","repeat","restFrom","result","sampleSize","some","sortBy","sortedIndex","sortedIndexOf","sortedLastIndex","sortedLastIndexOf","sortedUniqBy","split","spreadFrom","startsWith","subtract","sumBy","take","takeRight","takeRightWhile","takeWhile","tap","throttle","thru","times","trimChars","trimCharsEnd","trimCharsStart","truncate","union","uniqBy","uniqWith","unset","unzipWith","without","wrap","xor","zip","zipObject","zipObjectDeep"],3:["assignInWith","assignWith","clamp","differenceBy","differenceWith","findFrom","findIndexFrom","findLastFrom","findLastIndexFrom","getOr","includesFrom","indexOfFrom","inRange","intersectionBy","intersectionWith","invokeArgs","invokeArgsMap","isEqualWith","isMatchWith","flatMapDepth","lastIndexOfFrom","mergeWith","orderBy","padChars","padCharsEnd","padCharsStart","pullAllBy","pullAllWith","rangeStep","rangeStepRight","reduce","reduceRight","replace","set","slice","sortedIndexBy","sortedLastIndexBy","transform","unionBy","unionWith","update","xorBy","xorWith","zipWith"],4:["fill","setWith","updateWith"]},o.aryRearg={2:[1,0],3:[2,0,1],4:[3,2,0,1]},o.iterateeAry={dropRightWhile:1,dropWhile:1,every:1,filter:1,find:1,findFrom:1,findIndex:1,findIndexFrom:1,findKey:1,findLast:1,findLastFrom:1,findLastIndex:1,findLastIndexFrom:1,findLastKey:1,flatMap:1,flatMapDeep:1,flatMapDepth:1,forEach:1,forEachRight:1,forIn:1,forInRight:1,forOwn:1,forOwnRight:1,map:1,mapKeys:1,mapValues:1,partition:1,reduce:2,reduceRight:2,reject:1,remove:1,some:1,takeRightWhile:1,takeWhile:1,times:1,transform:2},o.iterateeRearg={mapKeys:[1],reduceRight:[1,0]},o.methodRearg={assignInAllWith:[1,0],assignInWith:[1,2,0],assignAllWith:[1,0],assignWith:[1,2,0],differenceBy:[1,2,0],differenceWith:[1,2,0],getOr:[2,1,0],intersectionBy:[1,2,0],intersectionWith:[1,2,0],isEqualWith:[1,2,0],isMatchWith:[2,1,0],mergeAllWith:[1,0],mergeWith:[1,2,0],padChars:[2,1,0],padCharsEnd:[2,1,0],padCharsStart:[2,1,0],pullAllBy:[2,1,0],pullAllWith:[2,1,0],rangeStep:[1,2,0],rangeStepRight:[1,2,0],setWith:[3,1,2,0],sortedIndexBy:[2,1,0],sortedLastIndexBy:[2,1,0],unionBy:[1,2,0],unionWith:[1,2,0],updateWith:[3,1,2,0],xorBy:[1,2,0],xorWith:[1,2,0],zipWith:[1,2,0]},o.methodSpread={assignAll:{start:0},assignAllWith:{start:0},assignInAll:{start:0},assignInAllWith:{start:0},defaultsAll:{start:0},defaultsDeepAll:{start:0},invokeArgs:{start:2},invokeArgsMap:{start:2},mergeAll:{start:0},mergeAllWith:{start:0},partial:{start:1},partialRight:{start:1},without:{start:1},zipAll:{start:0}},o.mutate={array:{fill:!0,pull:!0,pullAll:!0,pullAllBy:!0,pullAllWith:!0,pullAt:!0,remove:!0,reverse:!0},object:{assign:!0,assignAll:!0,assignAllWith:!0,assignIn:!0,assignInAll:!0,assignInAllWith:!0,assignInWith:!0,assignWith:!0,defaults:!0,defaultsAll:!0,defaultsDeep:!0,defaultsDeepAll:!0,merge:!0,mergeAll:!0,mergeAllWith:!0,mergeWith:!0},set:{set:!0,setWith:!0,unset:!0,update:!0,updateWith:!0}},o.realToAlias=function(){var s=Object.prototype.hasOwnProperty,i=o.aliasToReal,a={};for(var u in i){var _=i[u];s.call(a,_)?a[_].push(u):a[_]=[u]}return a}(),o.remap={assignAll:"assign",assignAllWith:"assignWith",assignInAll:"assignIn",assignInAllWith:"assignInWith",curryN:"curry",curryRightN:"curryRight",defaultsAll:"defaults",defaultsDeepAll:"defaultsDeep",findFrom:"find",findIndexFrom:"findIndex",findLastFrom:"findLast",findLastIndexFrom:"findLastIndex",getOr:"get",includesFrom:"includes",indexOfFrom:"indexOf",invokeArgs:"invoke",invokeArgsMap:"invokeMap",lastIndexOfFrom:"lastIndexOf",mergeAll:"merge",mergeAllWith:"mergeWith",padChars:"pad",padCharsEnd:"padEnd",padCharsStart:"padStart",propertyOf:"get",rangeStep:"range",rangeStepRight:"rangeRight",restFrom:"rest",spreadFrom:"spread",trimChars:"trim",trimCharsEnd:"trimEnd",trimCharsStart:"trimStart",zipAll:"zip"},o.skipFixed={castArray:!0,flow:!0,flowRight:!0,iteratee:!0,mixin:!0,rearg:!0,runInContext:!0},o.skipRearg={add:!0,assign:!0,assignIn:!0,bind:!0,bindKey:!0,concat:!0,difference:!0,divide:!0,eq:!0,gt:!0,gte:!0,isEqual:!0,lt:!0,lte:!0,matchesProperty:!0,merge:!0,multiply:!0,overArgs:!0,partial:!0,partialRight:!0,propertyOf:!0,random:!0,range:!0,rangeRight:!0,subtract:!0,zip:!0,zipObject:!0,zipObjectDeep:!0}},17255:(s,o,i)=>{var a=i(47422);s.exports=function basePropertyDeep(s){return function(o){return a(o,s)}}},17285:s=>{function source(s){return s?"string"==typeof s?s:s.source:null}function lookahead(s){return concat("(?=",s,")")}function concat(...s){return s.map((s=>source(s))).join("")}function either(...s){return"("+s.map((s=>source(s))).join("|")+")"}s.exports=function xml(s){const o=concat(/[A-Z_]/,function optional(s){return concat("(",s,")?")}(/[A-Z0-9_.-]*:/),/[A-Z0-9_.-]*/),i={className:"symbol",begin:/&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/},a={begin:/\s/,contains:[{className:"meta-keyword",begin:/#?[a-z_][a-z1-9_-]+/,illegal:/\n/}]},u=s.inherit(a,{begin:/\(/,end:/\)/}),_=s.inherit(s.APOS_STRING_MODE,{className:"meta-string"}),w=s.inherit(s.QUOTE_STRING_MODE,{className:"meta-string"}),x={endsWithParent:!0,illegal:/`]+/}]}]}]};return{name:"HTML, XML",aliases:["html","xhtml","rss","atom","xjb","xsd","xsl","plist","wsf","svg"],case_insensitive:!0,contains:[{className:"meta",begin://,relevance:10,contains:[a,w,_,u,{begin:/\[/,end:/\]/,contains:[{className:"meta",begin://,contains:[a,u,w,_]}]}]},s.COMMENT(//,{relevance:10}),{begin://,relevance:10},i,{className:"meta",begin:/<\?xml/,end:/\?>/,relevance:10},{className:"tag",begin:/)/,end:/>/,keywords:{name:"style"},contains:[x],starts:{end:/<\/style>/,returnEnd:!0,subLanguage:["css","xml"]}},{className:"tag",begin:/)/,end:/>/,keywords:{name:"script"},contains:[x],starts:{end:/<\/script>/,returnEnd:!0,subLanguage:["javascript","handlebars","xml"]}},{className:"tag",begin:/<>|<\/>/},{className:"tag",begin:concat(//,/>/,/\s/)))),end:/\/?>/,contains:[{className:"name",begin:o,relevance:0,starts:x}]},{className:"tag",begin:concat(/<\//,lookahead(concat(o,/>/))),contains:[{className:"name",begin:o,relevance:0},{begin:/>/,relevance:0,endsParent:!0}]}]}}},17400:(s,o,i)=>{var a=i(99374),u=1/0;s.exports=function toFinite(s){return s?(s=a(s))===u||s===-1/0?17976931348623157e292*(s<0?-1:1):s==s?s:0:0===s?s:0}},17533:s=>{s.exports=function yaml(s){var o="true false yes no null",i="[\\w#;/?:@&=+$,.~*'()[\\]]+",a={className:"string",relevance:0,variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/\S+/}],contains:[s.BACKSLASH_ESCAPE,{className:"template-variable",variants:[{begin:/\{\{/,end:/\}\}/},{begin:/%\{/,end:/\}/}]}]},u=s.inherit(a,{variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/[^\s,{}[\]]+/}]}),_={className:"number",begin:"\\b[0-9]{4}(-[0-9][0-9]){0,2}([Tt \\t][0-9][0-9]?(:[0-9][0-9]){2})?(\\.[0-9]*)?([ \\t])*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?)?\\b"},w={end:",",endsWithParent:!0,excludeEnd:!0,keywords:o,relevance:0},x={begin:/\{/,end:/\}/,contains:[w],illegal:"\\n",relevance:0},C={begin:"\\[",end:"\\]",contains:[w],illegal:"\\n",relevance:0},j=[{className:"attr",variants:[{begin:"\\w[\\w :\\/.-]*:(?=[ \t]|$)"},{begin:'"\\w[\\w :\\/.-]*":(?=[ \t]|$)'},{begin:"'\\w[\\w :\\/.-]*':(?=[ \t]|$)"}]},{className:"meta",begin:"^---\\s*$",relevance:10},{className:"string",begin:"[\\|>]([1-9]?[+-])?[ ]*\\n( +)[^ ][^\\n]*\\n(\\2[^\\n]+\\n?)*"},{begin:"<%[%=-]?",end:"[%-]?%>",subLanguage:"ruby",excludeBegin:!0,excludeEnd:!0,relevance:0},{className:"type",begin:"!\\w+!"+i},{className:"type",begin:"!<"+i+">"},{className:"type",begin:"!"+i},{className:"type",begin:"!!"+i},{className:"meta",begin:"&"+s.UNDERSCORE_IDENT_RE+"$"},{className:"meta",begin:"\\*"+s.UNDERSCORE_IDENT_RE+"$"},{className:"bullet",begin:"-(?=[ ]|$)",relevance:0},s.HASH_COMMENT_MODE,{beginKeywords:o,keywords:{literal:o}},_,{className:"number",begin:s.C_NUMBER_RE+"\\b",relevance:0},x,C,a],L=[...j];return L.pop(),L.push(u),w.contains=L,{name:"YAML",case_insensitive:!0,aliases:["yml"],contains:j}}},17670:(s,o,i)=>{var a=i(12651);s.exports=function mapCacheDelete(s){var o=a(this,s).delete(s);return this.size-=o?1:0,o}},17965:(s,o,i)=>{"use strict";var a=i(16426),u={"text/plain":"Text","text/html":"Url",default:"Text"};s.exports=function copy(s,o){var i,_,w,x,C,j,L=!1;o||(o={}),i=o.debug||!1;try{if(w=a(),x=document.createRange(),C=document.getSelection(),(j=document.createElement("span")).textContent=s,j.ariaHidden="true",j.style.all="unset",j.style.position="fixed",j.style.top=0,j.style.clip="rect(0, 0, 0, 0)",j.style.whiteSpace="pre",j.style.webkitUserSelect="text",j.style.MozUserSelect="text",j.style.msUserSelect="text",j.style.userSelect="text",j.addEventListener("copy",(function(a){if(a.stopPropagation(),o.format)if(a.preventDefault(),void 0===a.clipboardData){i&&console.warn("unable to use e.clipboardData"),i&&console.warn("trying IE specific stuff"),window.clipboardData.clearData();var _=u[o.format]||u.default;window.clipboardData.setData(_,s)}else a.clipboardData.clearData(),a.clipboardData.setData(o.format,s);o.onCopy&&(a.preventDefault(),o.onCopy(a.clipboardData))})),document.body.appendChild(j),x.selectNodeContents(j),C.addRange(x),!document.execCommand("copy"))throw new Error("copy command was unsuccessful");L=!0}catch(a){i&&console.error("unable to copy using execCommand: ",a),i&&console.warn("trying IE specific stuff");try{window.clipboardData.setData(o.format||"text",s),o.onCopy&&o.onCopy(window.clipboardData),L=!0}catch(a){i&&console.error("unable to copy using clipboardData: ",a),i&&console.error("falling back to prompt"),_=function format(s){var o=(/mac os x/i.test(navigator.userAgent)?"⌘":"Ctrl")+"+C";return s.replace(/#{\s*key\s*}/g,o)}("message"in o?o.message:"Copy to clipboard: #{key}, Enter"),window.prompt(_,s)}}finally{C&&("function"==typeof C.removeRange?C.removeRange(x):C.removeAllRanges()),j&&document.body.removeChild(j),w()}return L}},18073:(s,o,i)=>{var a=i(85087),u=i(54641),_=i(70981);s.exports=function createRecurry(s,o,i,w,x,C,j,L,B,$){var V=8&o;o|=V?32:64,4&(o&=~(V?64:32))||(o&=-4);var U=[s,o,x,V?C:void 0,V?j:void 0,V?void 0:C,V?void 0:j,L,B,$],z=i.apply(void 0,U);return a(s)&&u(z,U),z.placeholder=w,_(z,s,o)}},19123:(s,o,i)=>{var a=i(65606),u=i(31499),_=i(88310).Stream;function resolve(s,o,i){var a,_=function create_indent(s,o){return new Array(o||0).join(s||"")}(o,i=i||0),w=s;if("object"==typeof s&&((w=s[a=Object.keys(s)[0]])&&w._elem))return w._elem.name=a,w._elem.icount=i,w._elem.indent=o,w._elem.indents=_,w._elem.interrupt=w,w._elem;var x,C=[],j=[];function get_attributes(s){Object.keys(s).forEach((function(o){C.push(function attribute(s,o){return s+'="'+u(o)+'"'}(o,s[o]))}))}switch(typeof w){case"object":if(null===w)break;w._attr&&get_attributes(w._attr),w._cdata&&j.push(("/g,"]]]]>")+"]]>"),w.forEach&&(x=!1,j.push(""),w.forEach((function(s){"object"==typeof s?"_attr"==Object.keys(s)[0]?get_attributes(s._attr):j.push(resolve(s,o,i+1)):(j.pop(),x=!0,j.push(u(s)))})),x||j.push(""));break;default:j.push(u(w))}return{name:a,interrupt:!1,attributes:C,content:j,icount:i,indents:_,indent:o}}function format(s,o,i){if("object"!=typeof o)return s(!1,o);var a=o.interrupt?1:o.content.length;function proceed(){for(;o.content.length;){var u=o.content.shift();if(void 0!==u){if(interrupt(u))return;format(s,u)}}s(!1,(a>1?o.indents:"")+(o.name?"":"")+(o.indent&&!i?"\n":"")),i&&i()}function interrupt(o){return!!o.interrupt&&(o.interrupt.append=s,o.interrupt.end=proceed,o.interrupt=!1,s(!0),!0)}if(s(!1,o.indents+(o.name?"<"+o.name:"")+(o.attributes.length?" "+o.attributes.join(" "):"")+(a?o.name?">":"":o.name?"/>":"")+(o.indent&&a>1?"\n":"")),!a)return s(!1,o.indent?"\n":"");interrupt(o)||proceed()}s.exports=function xml(s,o){"object"!=typeof o&&(o={indent:o});var i=o.stream?new _:null,u="",w=!1,x=o.indent?!0===o.indent?" ":o.indent:"",C=!0;function delay(s){C?a.nextTick(s):s()}function append(s,o){if(void 0!==o&&(u+=o),s&&!w&&(i=i||new _,w=!0),s&&w){var a=u;delay((function(){i.emit("data",a)})),u=""}}function add(s,o){format(append,resolve(s,x,x?1:0),o)}function end(){if(i){var s=u;delay((function(){i.emit("data",s),i.emit("end"),i.readable=!1,i.emit("close")}))}}return delay((function(){C=!1})),o.declaration&&function addXmlDeclaration(s){var o={version:"1.0",encoding:s.encoding||"UTF-8"};s.standalone&&(o.standalone=s.standalone),add({"?xml":{_attr:o}}),u=u.replace("/>","?>")}(o.declaration),s&&s.forEach?s.forEach((function(o,i){var a;i+1===s.length&&(a=end),add(o,a)})):add(s,end),i?(i.readable=!0,i):u},s.exports.element=s.exports.Element=function element(){var s={_elem:resolve(Array.prototype.slice.call(arguments)),push:function(s){if(!this.append)throw new Error("not assigned to a parent!");var o=this,i=this._elem.indent;format(this.append,resolve(s,i,this._elem.icount+(i?1:0)),(function(){o.append(!0)}))},close:function(s){void 0!==s&&this.push(s),this.end&&this.end()}};return s}},19219:s=>{s.exports=function cacheHas(s,o){return s.has(o)}},19287:s=>{"use strict";s.exports={CSSRuleList:0,CSSStyleDeclaration:0,CSSValueList:0,ClientRectList:0,DOMRectList:0,DOMStringList:0,DOMTokenList:1,DataTransferItemList:0,FileList:0,HTMLAllCollection:0,HTMLCollection:0,HTMLFormElement:0,HTMLSelectElement:0,MediaList:0,MimeTypeArray:0,NamedNodeMap:0,NodeList:1,PaintRequestList:0,Plugin:0,PluginArray:0,SVGLengthList:0,SVGNumberList:0,SVGPathSegList:0,SVGPointList:0,SVGStringList:0,SVGTransformList:0,SourceBufferList:0,StyleSheetList:0,TextTrackCueList:0,TextTrackList:0,TouchList:0}},19358:(s,o,i)=>{"use strict";var a=i(85582),u=i(49724),_=i(61626),w=i(88280),x=i(79192),C=i(19595),j=i(54829),L=i(34084),B=i(32096),$=i(39259),V=i(85884),U=i(39447),z=i(7376);s.exports=function(s,o,i,Y){var Z="stackTraceLimit",ee=Y?2:1,ie=s.split("."),ae=ie[ie.length-1],ce=a.apply(null,ie);if(ce){var le=ce.prototype;if(!z&&u(le,"cause")&&delete le.cause,!i)return ce;var pe=a("Error"),de=o((function(s,o){var i=B(Y?o:s,void 0),a=Y?new ce(s):new ce;return void 0!==i&&_(a,"message",i),V(a,de,a.stack,2),this&&w(le,this)&&L(a,this,de),arguments.length>ee&&$(a,arguments[ee]),a}));if(de.prototype=le,"Error"!==ae?x?x(de,pe):C(de,pe,{name:!0}):U&&Z in ce&&(j(de,ce,Z),j(de,ce,"prepareStackTrace")),C(de,ce),!z)try{le.name!==ae&&_(le,"name",ae),le.constructor=de}catch(s){}return de}}},19570:(s,o,i)=>{var a=i(37334),u=i(93243),_=i(83488),w=u?function(s,o){return u(s,"toString",{configurable:!0,enumerable:!1,value:a(o),writable:!0})}:_;s.exports=w},19595:(s,o,i)=>{"use strict";var a=i(49724),u=i(11042),_=i(13846),w=i(74284);s.exports=function(s,o,i){for(var x=u(o),C=w.f,j=_.f,L=0;L{"use strict";var a=i(23034);s.exports=a},19846:(s,o,i)=>{"use strict";var a=i(20798),u=i(98828),_=i(45951).String;s.exports=!!Object.getOwnPropertySymbols&&!u((function(){var s=Symbol("symbol detection");return!_(s)||!(Object(s)instanceof Symbol)||!Symbol.sham&&a&&a<41}))},19931:(s,o,i)=>{var a=i(31769),u=i(68090),_=i(68969),w=i(77797);s.exports=function baseUnset(s,o){return o=a(o,s),null==(s=_(s,o))||delete s[w(u(o))]}},20181:(s,o,i)=>{var a=/^\s+|\s+$/g,u=/^[-+]0x[0-9a-f]+$/i,_=/^0b[01]+$/i,w=/^0o[0-7]+$/i,x=parseInt,C="object"==typeof i.g&&i.g&&i.g.Object===Object&&i.g,j="object"==typeof self&&self&&self.Object===Object&&self,L=C||j||Function("return this")(),B=Object.prototype.toString,$=Math.max,V=Math.min,now=function(){return L.Date.now()};function isObject(s){var o=typeof s;return!!s&&("object"==o||"function"==o)}function toNumber(s){if("number"==typeof s)return s;if(function isSymbol(s){return"symbol"==typeof s||function isObjectLike(s){return!!s&&"object"==typeof s}(s)&&"[object Symbol]"==B.call(s)}(s))return NaN;if(isObject(s)){var o="function"==typeof s.valueOf?s.valueOf():s;s=isObject(o)?o+"":o}if("string"!=typeof s)return 0===s?s:+s;s=s.replace(a,"");var i=_.test(s);return i||w.test(s)?x(s.slice(2),i?2:8):u.test(s)?NaN:+s}s.exports=function debounce(s,o,i){var a,u,_,w,x,C,j=0,L=!1,B=!1,U=!0;if("function"!=typeof s)throw new TypeError("Expected a function");function invokeFunc(o){var i=a,_=u;return a=u=void 0,j=o,w=s.apply(_,i)}function shouldInvoke(s){var i=s-C;return void 0===C||i>=o||i<0||B&&s-j>=_}function timerExpired(){var s=now();if(shouldInvoke(s))return trailingEdge(s);x=setTimeout(timerExpired,function remainingWait(s){var i=o-(s-C);return B?V(i,_-(s-j)):i}(s))}function trailingEdge(s){return x=void 0,U&&a?invokeFunc(s):(a=u=void 0,w)}function debounced(){var s=now(),i=shouldInvoke(s);if(a=arguments,u=this,C=s,i){if(void 0===x)return function leadingEdge(s){return j=s,x=setTimeout(timerExpired,o),L?invokeFunc(s):w}(C);if(B)return x=setTimeout(timerExpired,o),invokeFunc(C)}return void 0===x&&(x=setTimeout(timerExpired,o)),w}return o=toNumber(o)||0,isObject(i)&&(L=!!i.leading,_=(B="maxWait"in i)?$(toNumber(i.maxWait)||0,o):_,U="trailing"in i?!!i.trailing:U),debounced.cancel=function cancel(){void 0!==x&&clearTimeout(x),j=0,a=C=u=x=void 0},debounced.flush=function flush(){return void 0===x?w:trailingEdge(now())},debounced}},20317:s=>{s.exports=function mapToArray(s){var o=-1,i=Array(s.size);return s.forEach((function(s,a){i[++o]=[a,s]})),i}},20334:(s,o,i)=>{"use strict";var a=i(48287).Buffer;class NonError extends Error{constructor(s){super(NonError._prepareSuperMessage(s)),Object.defineProperty(this,"name",{value:"NonError",configurable:!0,writable:!0}),Error.captureStackTrace&&Error.captureStackTrace(this,NonError)}static _prepareSuperMessage(s){try{return JSON.stringify(s)}catch{return String(s)}}}const u=[{property:"name",enumerable:!1},{property:"message",enumerable:!1},{property:"stack",enumerable:!1},{property:"code",enumerable:!0}],_=Symbol(".toJSON called"),destroyCircular=({from:s,seen:o,to_:i,forceEnumerable:w,maxDepth:x,depth:C})=>{const j=i||(Array.isArray(s)?[]:{});if(o.push(s),C>=x)return j;if("function"==typeof s.toJSON&&!0!==s[_])return(s=>{s[_]=!0;const o=s.toJSON();return delete s[_],o})(s);for(const[i,u]of Object.entries(s))"function"==typeof a&&a.isBuffer(u)?j[i]="[object Buffer]":"function"!=typeof u&&(u&&"object"==typeof u?o.includes(s[i])?j[i]="[Circular]":(C++,j[i]=destroyCircular({from:s[i],seen:o.slice(),forceEnumerable:w,maxDepth:x,depth:C})):j[i]=u);for(const{property:o,enumerable:i}of u)"string"==typeof s[o]&&Object.defineProperty(j,o,{value:s[o],enumerable:!!w||i,configurable:!0,writable:!0});return j};s.exports={serializeError:(s,o={})=>{const{maxDepth:i=Number.POSITIVE_INFINITY}=o;return"object"==typeof s&&null!==s?destroyCircular({from:s,seen:[],forceEnumerable:!0,maxDepth:i,depth:0}):"function"==typeof s?`[Function: ${s.name||"anonymous"}]`:s},deserializeError:(s,o={})=>{const{maxDepth:i=Number.POSITIVE_INFINITY}=o;if(s instanceof Error)return s;if("object"==typeof s&&null!==s&&!Array.isArray(s)){const o=new Error;return destroyCircular({from:s,seen:[],to_:o,maxDepth:i,depth:0}),o}return new NonError(s)}}},20426:s=>{var o=Object.prototype.hasOwnProperty;s.exports=function baseHas(s,i){return null!=s&&o.call(s,i)}},20575:(s,o,i)=>{"use strict";var a=i(3121);s.exports=function(s){return a(s.length)}},20798:(s,o,i)=>{"use strict";var a,u,_=i(45951),w=i(96794),x=_.process,C=_.Deno,j=x&&x.versions||C&&C.version,L=j&&j.v8;L&&(u=(a=L.split("."))[0]>0&&a[0]<4?1:+(a[0]+a[1])),!u&&w&&(!(a=w.match(/Edge\/(\d+)/))||a[1]>=74)&&(a=w.match(/Chrome\/(\d+)/))&&(u=+a[1]),s.exports=u},20850:(s,o,i)=>{"use strict";s.exports=i(46076)},20999:(s,o,i)=>{var a=i(69302),u=i(36800);s.exports=function createAssigner(s){return a((function(o,i){var a=-1,_=i.length,w=_>1?i[_-1]:void 0,x=_>2?i[2]:void 0;for(w=s.length>3&&"function"==typeof w?(_--,w):void 0,x&&u(i[0],i[1],x)&&(w=_<3?void 0:w,_=1),o=Object(o);++a<_;){var C=i[a];C&&s(o,C,a,w)}return o}))}},21549:(s,o,i)=>{var a=i(22032),u=i(63862),_=i(66721),w=i(12749),x=i(35749);function Hash(s){var o=-1,i=null==s?0:s.length;for(this.clear();++o{var a=i(16547),u=i(43360);s.exports=function copyObject(s,o,i,_){var w=!i;i||(i={});for(var x=-1,C=o.length;++x{var a=i(51873),u=i(37828),_=i(75288),w=i(25911),x=i(20317),C=i(84247),j=a?a.prototype:void 0,L=j?j.valueOf:void 0;s.exports=function equalByTag(s,o,i,a,j,B,$){switch(i){case"[object DataView]":if(s.byteLength!=o.byteLength||s.byteOffset!=o.byteOffset)return!1;s=s.buffer,o=o.buffer;case"[object ArrayBuffer]":return!(s.byteLength!=o.byteLength||!B(new u(s),new u(o)));case"[object Boolean]":case"[object Date]":case"[object Number]":return _(+s,+o);case"[object Error]":return s.name==o.name&&s.message==o.message;case"[object RegExp]":case"[object String]":return s==o+"";case"[object Map]":var V=x;case"[object Set]":var U=1&a;if(V||(V=C),s.size!=o.size&&!U)return!1;var z=$.get(s);if(z)return z==o;a|=2,$.set(s,o);var Y=w(V(s),V(o),a,j,B,$);return $.delete(s),Y;case"[object Symbol]":if(L)return L.call(s)==L.call(o)}return!1}},22032:(s,o,i)=>{var a=i(81042);s.exports=function hashClear(){this.__data__=a?a(null):{},this.size=0}},22225:s=>{var o="\\ud800-\\udfff",i="\\u2700-\\u27bf",a="a-z\\xdf-\\xf6\\xf8-\\xff",u="A-Z\\xc0-\\xd6\\xd8-\\xde",_="\\xac\\xb1\\xd7\\xf7\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf\\u2000-\\u206f \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000",w="["+_+"]",x="\\d+",C="["+i+"]",j="["+a+"]",L="[^"+o+_+x+i+a+u+"]",B="(?:\\ud83c[\\udde6-\\uddff]){2}",$="[\\ud800-\\udbff][\\udc00-\\udfff]",V="["+u+"]",U="(?:"+j+"|"+L+")",z="(?:"+V+"|"+L+")",Y="(?:['’](?:d|ll|m|re|s|t|ve))?",Z="(?:['’](?:D|LL|M|RE|S|T|VE))?",ee="(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?",ie="[\\ufe0e\\ufe0f]?",ae=ie+ee+("(?:\\u200d(?:"+["[^"+o+"]",B,$].join("|")+")"+ie+ee+")*"),ce="(?:"+[C,B,$].join("|")+")"+ae,le=RegExp([V+"?"+j+"+"+Y+"(?="+[w,V,"$"].join("|")+")",z+"+"+Z+"(?="+[w,V+U,"$"].join("|")+")",V+"?"+U+"+"+Y,V+"+"+Z,"\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])","\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])",x,ce].join("|"),"g");s.exports=function unicodeWords(s){return s.match(le)||[]}},22551:(s,o,i)=>{"use strict";var a=i(96540),u=i(69982);function p(s){for(var o="https://reactjs.org/docs/error-decoder.html?invariant="+s,i=1;i