Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,18 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies

# Install uv
- name: Install uv
run: |
python -m pip install --upgrade pip
python -m pip install poetry
python -m poetry config virtualenvs.in-project true
python -m poetry install
curl -LsSf https://astral.sh/uv/install.sh | sh

# Run tests
- name: Test with pytest
run: |
python -m poetry run python -m pytest -sxv
uv run pytest -sxv

# Run lint
- name: Lint with ruff
run: |
uvx ruff check .
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.9
21 changes: 3 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,5 @@
env:
rm -Rf env
python3 -m venv env
env/bin/pip install -U pip setuptools
env/bin/pip install poetry
source env/bin/activate && python -m poetry install
uv sync

black:
python -m black --line-length 88 tableau_sql_parser/
python -m black --line-length 88 tests/

isort:
isort tableau_sql_parser/
isort tests/

lint:
flake8 --max-line-length 88 tableau_sql_parser/
flake8 --max-line-length 88 tests/

format: black isort lint
format:
uvx ruff check . --fix
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# tableau-sql-parser
SQL parser for Tableau custom SQL requests

## Usage
## What is it ?

Given a .twb or a .twbx Tableau archive, you can extract these information:
Given a .twb or a .twbx Tableau archive, you can extract as a report these information:
- number of queries analyzed
- tables used
- a tree-like structure describing `schema.tables.column`

The CLI command to use is `sqlparse`:
## How to use it ?

`sqlparse -f file/to/parse.twb(x) -r custom_report_name -o`
**Requirement** : install [uv](https://github.com/astral-sh/uv) globally (with brew for example)

Type `sqlparse --help` for more details.
Then run `uv sync`
Then download a Tableau archive (.twbx or .twb)
Then run `uv run sqlparse -f file/to/parse/archive.twb(x) -r custom_report_name -o` to extract the report

Type `uv run sqlparse --help` for more details.
700 changes: 0 additions & 700 deletions poetry.lock

This file was deleted.

48 changes: 28 additions & 20 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
[tool.poetry]
[project]
name = "tableau-sql-parser"
version = "0.1.1"
version = "0.1.2"
description = "A tool for parsing Tableau custom SQL queries and extracting useful information"
authors = ["florian-drouet <50357200+florian-drouet@users.noreply.github.com>"]
license = "LICENSE"
authors = [
{ name = "Florian Drouet", email = "florian.drouet@gmail.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
packages = [{include = "tableau_sql_parser"}]
requires-python = ">=3.9,<3.11"
dependencies = [
"lxml>=5.4.0",
"sqlfluff>=3.4.0",
]

[tool.poetry.scripts]
sqlparse = "tableau_sql_parser.cli:main"
[tool.uv]
package = true

[tool.poetry.dependencies]
python = "^3.9"
lxml = "4.9"
sqlfluff = "2.1"
[tool.ruff]
target-version = "py39"
line-length = 88

[tool.poetry.group.dev.dependencies]
black = "23.7"
flake8 = "6.0"
isort = "5.12"
pytest = "7.4"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
lint.select = [
"E", "F", "W", # Basic errors and warnings
"A", # Shadowing builtins
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"I", # isort
"N", # pep8-naming
"ANN", # Type annotations
"UP", # pyupgrade
"SIM", # Simplify
"PLC", "PLE", "PLW" # pylint
]
2 changes: 1 addition & 1 deletion tableau_sql_parser/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
is_flag=True,
required=False,
)
def main(file_to_parse, report_name, is_output):
def main(file_to_parse: str, report_name: str, is_output: bool) -> None:
click.echo(f"File name is: {file_to_parse} and report name is: {report_name}")
my_workbook = TableauWorkbook(filename=file_to_parse, report_name=report_name)
tables_names, column_names, number_queries = my_workbook._generate_output()
Expand Down
8 changes: 4 additions & 4 deletions tableau_sql_parser/output_formatting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class OutputFormatting:
def __init__(self, report_name: str, alias: dict, columns: dict):
def __init__(self, report_name: str, alias: dict, columns: dict) -> None:
self.report_name = report_name
self.tables_names = []
self.column_names = []
Expand All @@ -14,21 +14,21 @@ def get_column_names(alias: dict, columns: dict) -> list:
for value in columns.values():
split = value.split(".", 1)
potential_alias = split[0]
if potential_alias in alias.keys():
if potential_alias in alias:
column_names_full.append(f"{alias[potential_alias]}.{split[1]}")
else:
column_names_full.append(potential_alias)
return column_names_full

def get_column_names_all(self):
def get_column_names_all(self) -> None:
temp_column_names = []
for i in range(0, len(self.alias)):
temp_column_names.extend(
self.get_column_names(alias=self.alias[i], columns=self.columns[i])
)
self.column_names = sorted([*set(temp_column_names)])

def get_tables_names(self):
def get_tables_names(self) -> None:
temp_table_names = []
for column in self.column_names:
temp_table_names.append(".".join(column[::-1].split(".", 1)[1:])[::-1])
Expand Down
8 changes: 4 additions & 4 deletions tableau_sql_parser/recursive_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class RecursiveSearch:
def __init__(self):
def __init__(self) -> None:
self.stock = {}
self.index = 0
self.columns = {}
Expand Down Expand Up @@ -33,19 +33,19 @@ def get_full_name_columns(alias: dict, columns: dict) -> list:
for value in columns.values():
split = value.split(".", 1)
potential_alias = split[0]
if potential_alias in alias.keys():
if potential_alias in alias:
full_names_columns.append(f"{alias[potential_alias]}.{split[1]}")
else:
full_names_columns.append(potential_alias)
return full_names_columns

def recursive_depth_list(self, file_to_parse: list):
def recursive_depth_list(self, file_to_parse: list) -> None:
for element in file_to_parse:
self.index += 1
if isinstance(element, dict):
self.recursive_depth(file_to_parse=element)

def recursive_depth(self, file_to_parse: dict):
def recursive_depth(self, file_to_parse: dict) -> None:
for key, values in file_to_parse.items():
self.index += 1
if key == "from_expression_element":
Expand Down
19 changes: 7 additions & 12 deletions tableau_sql_parser/tableau_workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class TableauWorkbook:
Defines a workbook object from a filename.
"""

def __init__(self, filename, report_name):
def __init__(self, filename: str, report_name: str) -> None:
self.filename = os.path.normpath(filename)
self.report_name = report_name
self.xml = self._get_xml()
Expand All @@ -26,7 +26,7 @@ def __init__(self, filename, report_name):
self.alias,
) = self._recursive_search_sql()

def _get_xml(self):
def _get_xml(self) -> lxml.etree._Element:
"""
Returns the xml of the given .twb or .twbx file.
"""
Expand All @@ -51,24 +51,19 @@ def _get_xml(self):

return xml

def _get_custom_sql(self):
def _get_custom_sql(self) -> list:
"""
Returns a list of all unique custom sql queries in the workbook.
"""

search = self.xml.xpath("//relation[@type='text']")
queries = list(
set(
[
sql.text.lower().replace("<<", "<").replace(">>", ">")
for sql in search
]
)
{sql.text.lower().replace("<<", "<").replace(">>", ">") for sql in search}
)

return queries

def _parse_custom_sql(self):
def _parse_custom_sql(self) -> list:
"""
Returns a list of all unique custom sql queries
in the workbook parsed by sqlfluff parser
Expand All @@ -81,7 +76,7 @@ def _parse_custom_sql(self):
logging.error(e)
return parsed_queries

def _recursive_search_sql(self):
def _recursive_search_sql(self) -> tuple[list, list, list]:
"""
Returns a dict where keys are index and values are a list of column/table names
"""
Expand All @@ -96,7 +91,7 @@ def _recursive_search_sql(self):
alias.append(search.alias)
return recursive_searched_queries, columns, alias

def _generate_output(self):
def _generate_output(self) -> tuple[list, list, int]:
number_queries_analyzed = len(self.recursive_searched_queries)
report = OutputFormatting(
report_name=self.report_name,
Expand Down
19 changes: 5 additions & 14 deletions tableau_sql_parser/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
import os

import click


def is_valid_file(parser, arg):
if not os.path.exists(arg):
parser.error("The file %s does not exist!" % arg)
else:
return open(arg, "r") # return an open file handle


def tree_output(column_names: list) -> str:
increment = 0
tree = ""
Expand All @@ -26,11 +17,11 @@ def tree_output(column_names: list) -> str:

max_length = max(len(current_line), len(next_line))

for i in range(0, max_length - 1):
for ii in range(0, max_length - 1):
if (
current_line[i : i + 1] == next_line[i : i + 1]
and len(current_line[i : i + 1]) > 0
and len(next_line[i : i + 1]) > 0
current_line[ii : ii + 1] == next_line[ii : ii + 1]
and len(current_line[ii : ii + 1]) > 0
and len(next_line[ii : ii + 1]) > 0
Comment thread
florian-drouet marked this conversation as resolved.
):
increment += 1
elif increment > 0:
Expand All @@ -43,7 +34,7 @@ def tree_output(column_names: list) -> str:

def generate_report(
tables_names: list, column_names: list, number_queries: int, report_name: str
):
) -> None:
joined_tables = " | ".join([table for table in tables_names if table != ""])
tree = tree_output(column_names=column_names)
with open(f"{report_name}.txt", "w") as f:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_recursive_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
([" foo", "bar ", "1"], "foo bar 1"),
],
)
def test__flatten_values(test_input, expected):
def test__flatten_values(test_input: list, expected: str) -> None:
assert RecursiveSearch._flatten_values(test_input) == expected


Expand All @@ -24,7 +24,7 @@ def test__flatten_values(test_input, expected):
({"foo": "bar"}, "bar"),
],
)
def test__extract_elements(test_input, expected):
def test__extract_elements(test_input: list, expected: str) -> None:
assert RecursiveSearch._extract_elements(test_input) == expected


Expand Down Expand Up @@ -60,7 +60,7 @@ def test__extract_elements(test_input, expected):
}


def test__recursive_search():
def test__recursive_search() -> None:
search_sql = RecursiveSearch()
search_sql.recursive_depth(file_to_parse=parsed_query)
assert search_sql.stock == expected_output
2 changes: 1 addition & 1 deletion tests/test_tree_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@
"""


def test__flatten_values():
def test__flatten_values() -> None:
assert tree_output(test_input) == expected
Loading