Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions converters/ontology/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Python
__pycache__/
*.py[cod]
*.pyo
*.pyd

# Virtual environments
venv/
.venv/
env/

# pyenv
.python-version

# Build / packaging
dist/
build/
*.egg-info/
*.egg
.eggs/

# Pytest
.pytest_cache/
.coverage
htmlcov/

# Mypy
.mypy_cache/

# Ruff
.ruff_cache/

# VS Code
.vscode/
*.code-workspace
.history/

# JetBrains (PyCharm, IntelliJ, etc.)
.idea/
*.iml
*.iws
*.ipr
93 changes: 93 additions & 0 deletions converters/ontology/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# OSI Ontology Converters

Converters between OSI, Palantir, and Spec ontology formats.

| Converter | Direction |
|-----------|-----------|
| `palantir_to_osi` | Palantir ontology → OSI model |
| `osi_to_spec` | OSI model → Spec YAML |
| `spec_to_osi` | Spec YAML → OSI model |

## Prerequisites

- [pyenv](https://github.com/pyenv/pyenv) — manages the Python version

Install pyenv if you don't have it:

```bash
brew install pyenv
```

Add to your shell profile (`~/.zshrc` or `~/.bashrc`) and restart the shell:

```bash
export PYENV_ROOT="$HOME/.pyenv"
export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init -)"
```

## Setup

```bash
pyenv install 3.11
pyenv local 3.11
pip install --upgrade pip
pip install virtualenv
python -m virtualenv venv
source ./venv/bin/activate
pip install -r requirements.lock
pip install -e ".[dev]"
```

## Generating / updating the lock file

`requirements.lock` is produced by [pip-tools](https://github.com/jazzband/pip-tools) from `pyproject.toml`.
Run this whenever you add or change a dependency:

```bash
pip-compile --output-file requirements.lock pyproject.toml
```

## Usage

The package is importable as `osi` after installation:

```python
from osi.converter.palantir_to_osi.converter import PalantirToOsiConverter
from osi.converter.osi_to_spec.converter import OsiToSpecConverter
from osi.converter.spec_to_osi.converter import SpecToOsiConverter
```

## Scripts

### `scripts/palantir_to_osi.py`

Converts a Palantir ontology export (`.zip` file containing a Palantir ontology JSON and one or more dataset spec JSON files) into an OSI-compliant YAML representation, printed to stdout.

**Usage:**

```bash
python scripts/palantir_to_osi.py path/to/palantir_export.zip
```

Warnings are written to stderr; the OSI YAML is written to stdout.

**Environment variables (optional):**

| Variable | Default | Description |
|---------------------------|------------|----------------------------------------------------------|
| `SNOWFLAKE_DATABASE_NAME` | `PALANTIR` | Snowflake database name used to qualify table references |
| `SNOWFLAKE_SCHEMA_NAME` | `PALANTIR` | Snowflake schema name used to qualify table references |

If already set in your environment they will be picked up automatically. To override them for a single run:

```bash
SNOWFLAKE_DATABASE_NAME=MY_DB SNOWFLAKE_SCHEMA_NAME=MY_SCHEMA \
python scripts/palantir_to_osi.py path/to/palantir_export.zip
```

## Deactivating the environment

```bash
deactivate
```
29 changes: 29 additions & 0 deletions converters/ontology/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[project]
name = "ontology"
version = "0.1.0"
description = "OSI ontology converters — Palantir → OSI, OSI → Spec, Spec → OSI"
readme = "README.md"
authors = [
{ name = "RelationalAI", email = "support@relational.ai" },
]
requires-python = ">= 3.11"
dependencies = [
"pydantic",
"pyyaml",
]

[project.optional-dependencies]
dev = [
"pytest==9.0.3",
"pytest-snapshot",
"parameterized",
"pip-tools",
]

[tool.setuptools.packages.find]
where = ["src"]

[tool.pytest.ini_options]
testpaths = ["tests"]

pythonVersion = "3.11"
21 changes: 21 additions & 0 deletions converters/ontology/requirements.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=requirements.lock pyproject.toml
#
annotated-types==0.7.0
# via pydantic
pydantic==2.13.4
# via osi-ontology-converters (pyproject.toml)
pydantic-core==2.46.4
# via pydantic
pyyaml==6.0.3
# via osi-ontology-converters (pyproject.toml)
typing-extensions==4.15.0
# via
# pydantic
# pydantic-core
# typing-inspection
typing-inspection==0.4.2
# via pydantic
55 changes: 55 additions & 0 deletions converters/ontology/scripts/palantir_to_osi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Description:
#
# This script converts a zip file that contains:
# 1. A Palantir ontology (JSON file) and
# 2. A folder containing one or more Palantir dataset specs (JSON files)
# into an OSI compliant YAML representation of that ontology, using environment
# variables to configure the Snowflake database and schema names.
#
# Usage:
#
# $ python palantir_to_osi.py <path_to_zip_file>
#
# Environment variables used:
#
# - SNOWFLAKE_DATABASE_NAME
# - SNOWFLAKE_SCHEMA_NAME
#
# The tables that populate the ontology are named
# "{SNOWFLAKE_DATABASE_NAME}.{SNOWFLAKE_SCHEMA_NAME}.{TABLE_NAME}"
# where TABLE_NAME is the name of a data set that is referenced in
# the Palantir ontology.
#
# Outputs:
#
# - stderr: Warnings
#
import os
import sys
from pathlib import Path

from osi.converter.palantir_to_osi.converter import PalantirToOsiConverter
from osi.converter.osi_to_spec.converter import OsiToSpecConverter

from osi.external.palantir.parser import PalantirParser

if __name__ == "__main__":
db_name = os.environ.get("SNOWFLAKE_DATABASE_NAME", "PALANTIR")
schema_name = os.environ.get("SNOWFLAKE_SCHEMA_NAME", "PALANTIR")

if len(sys.argv) != 2:
raise Exception(f"++ Usage: {sys.argv[0]} path to Palantir sources")

path = Path(sys.argv[1])

parser = PalantirParser()

mode = "rb" if path.suffix.lower() == ".zip" else "r"
with open(path, mode) as file:
parser.parse(file)

ontology_model = PalantirToOsiConverter.convert(parser.model(), db_name, schema_name)

osi_spec = OsiToSpecConverter.convert(ontology_model)
print(osi_spec.dump_yaml())

72 changes: 72 additions & 0 deletions converters/ontology/src/osi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Public API surface for osi.

Consumers should import from here rather than from deep sub-paths.
"""

from osi.model import (
Concept,
ConceptMapping,
ConceptType,
CustomExtension,
Dataset,
DatasetField,
DialectExpression,
DialectExpressionSet,
Formula,
JoinPath,
LinkMapping,
Metric,
ObjectMapping,
OntologyComponent,
OntologyMapping,
OsiOntology,
ReferentMapping,
Relationship,
RelationshipMultiplicity,
Role,
SemanticModel,
)
from osi.spec import OsiSpec
from osi.parser import OsiParser
from osi.external.palantir.parser import PalantirParser
from osi.converter.spec_to_osi.converter import SpecToOsiConverter
from osi.converter.osi_to_spec.converter import OsiToSpecConverter
from osi.converter.palantir_to_osi.converter import PalantirToOsiConverter

__all__ = [
# Model — ontology layer
"Concept",
"ConceptType",
"Relationship",
"RelationshipMultiplicity",
"Role",
"Formula",
# Model — semantic layer
"Dataset",
"DatasetField",
"DialectExpression",
"DialectExpressionSet",
"JoinPath",
"Metric",
"SemanticModel",
# Model — mapping layer
"ObjectMapping",
"ReferentMapping",
"LinkMapping",
"ConceptMapping",
"OntologyMapping",
"OntologyComponent",
"OsiOntology",
# Supporting types
"CustomExtension",
# Spec DTO
"OsiSpec",
# Parsers
"OsiParser",
"PalantirParser",
# Converters
"SpecToOsiConverter",
"OsiToSpecConverter",
"PalantirToOsiConverter",
]
Empty file.
42 changes: 42 additions & 0 deletions converters/ontology/src/osi/common/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import io
import zipfile
from typing import Iterable


def iter_json_files_from_dir_in_zip(zf: zipfile.ZipFile, dir_prefix: str) -> Iterable[tuple[str, io.IOBase]]:
names = zf.namelist()
base_prefix = dir_prefix.rstrip("/") + "/"
roots = {n.split("/", 1)[0] for n in names if "/" in n}
candidate_prefixes = [base_prefix]
if len(roots) == 1:
root = next(iter(roots))
candidate_prefixes.append(f"{root}/{base_prefix}")

seen = set()
for name in names:
if name.endswith("/") or not name.lower().endswith(".json"):
continue
if any(name.startswith(p) for p in candidate_prefixes):
if name in seen:
continue
seen.add(name)
with zf.open(name, "r") as fp:
yield name, io.BytesIO(fp.read())

def open_top_level_file_from_zip(zf: zipfile.ZipFile, filename: str) -> io.IOBase:
names = set(zf.namelist())

def _open_to_io_base(n: str) -> io.IOBase:
with zf.open(n, "r") as fp:
return io.BytesIO(fp.read())

if filename in names:
return _open_to_io_base(filename)

roots = {n.split("/", 1)[0] for n in names if "/" in n}
if len(roots) == 1:
candidate = f"{next(iter(roots))}/{filename}"
if candidate in names:
return _open_to_io_base(candidate)

raise FileNotFoundError(f"Missing required top-level file: {filename}")
Loading