From d85963bff56dc068a929d7849ad72728bda5d6cc Mon Sep 17 00:00:00 2001 From: Jay Guo Date: Wed, 17 Dec 2025 15:46:20 -0500 Subject: [PATCH] fix: handle new and old format of dataset_query --- ckanext/in_app_reporting/tests/test_utils.py | 268 +++++++++++++++++++ ckanext/in_app_reporting/utils.py | 72 ++++- 2 files changed, 330 insertions(+), 10 deletions(-) diff --git a/ckanext/in_app_reporting/tests/test_utils.py b/ckanext/in_app_reporting/tests/test_utils.py index b7aff4c..2b3cf0a 100644 --- a/ckanext/in_app_reporting/tests/test_utils.py +++ b/ckanext/in_app_reporting/tests/test_utils.py @@ -116,6 +116,87 @@ def test_user_is_admin_or_editor_with_member_only(self): assert result is False +class TestExtractNativeSql: + """Test _extract_native_sql_from_dataset_query helper function""" + + def test_extract_native_sql_old_format(self): + """Test extraction from old format (native.query)""" + dataset_query = { + 'native': { + 'query': 'SELECT * FROM "test-resource-id"' + } + } + result = utils._extract_native_sql_from_dataset_query(dataset_query) + assert result == 'SELECT * FROM "test-resource-id"' + + def test_extract_native_sql_new_mbql_format(self): + """Test extraction from new MBQL format (stages[].native)""" + dataset_query = { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "bee42093-2c03-49f3-b185-200e745ec892" WHERE "Year" < \'2026\'' + } + ] + } + result = utils._extract_native_sql_from_dataset_query(dataset_query) + assert result == 'SELECT * FROM "bee42093-2c03-49f3-b185-200e745ec892" WHERE "Year" < \'2026\'' + + def test_extract_native_sql_new_format_multiple_stages(self): + """Test extraction from new format with multiple stages (should use first native stage)""" + dataset_query = { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "resource-123"' + }, + { + 'lib/type': 'mbql.stage/$limit', + 'limit': 100 + } + ] + } + result = utils._extract_native_sql_from_dataset_query(dataset_query) + assert result == 'SELECT * FROM "resource-123"' + + def test_extract_native_sql_empty_dataset_query(self): + """Test extraction with empty dataset_query""" + result = utils._extract_native_sql_from_dataset_query({}) + assert result == '' + + def test_extract_native_sql_none(self): + """Test extraction with None""" + result = utils._extract_native_sql_from_dataset_query(None) + assert result == '' + + def test_extract_native_sql_no_native_content(self): + """Test extraction when no native SQL is present""" + dataset_query = { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/$limit', + 'limit': 100 + } + ] + } + result = utils._extract_native_sql_from_dataset_query(dataset_query) + assert result == '' + + def test_extract_native_sql_old_format_native_as_string(self): + """Test extraction when native is a string (edge case)""" + dataset_query = { + 'native': 'SELECT * FROM "test"' + } + result = utils._extract_native_sql_from_dataset_query(dataset_query) + assert result == 'SELECT * FROM "test"' + + class TestMetabaseApiRequests: """Test Metabase API request functions""" @@ -485,6 +566,127 @@ def test_get_metabase_sql_questions_filters(self, mock_get_request): {'id': 1, 'name': 'Resource 1', 'type': 'question', 'updated_at': '2025-08-01T18:20:49.005658Z'} ] + @mock.patch('ckanext.in_app_reporting.utils.metabase_get_request') + def test_get_metabase_sql_questions_new_mbql_format(self, mock_get_request, app): + """Test get_metabase_sql_questions with new MBQL format (stages with native SQL)""" + mock_get_request.return_value = [ + { + 'id': 1, + 'name': 'MBQL Card 1', + 'type': 'question', + 'updated_at': '2025-08-01T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "bee42093-2c03-49f3-b185-200e745ec892" WHERE "Year" < \'2026\'' + } + ] + } + }, + { + 'id': 2, + 'name': 'MBQL Card 2', + 'type': 'question', + 'updated_at': '2025-08-02T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "different-resource-id" WHERE status = "active"' + } + ] + } + } + ] + + with app.flask_app.app_context(): + with mock.patch('ckanext.in_app_reporting.utils.METABASE_DB_ID', '4'), \ + mock.patch('ckanext.in_app_reporting.utils.METABASE_SITE_URL', 'https://example.com'), \ + mock.patch('ckanext.in_app_reporting.utils.collection_ids', ['1']), \ + mock.patch('ckan.plugins.toolkit.get_action') as mock_get_action: + + mock_get_action.return_value.side_effect = toolkit.ObjectNotFound() + + # Set user in flask.g + import ckan.plugins.toolkit as tk + user = factories.User() + tk.g.user = user['name'] + tk.g.userobj = model.User.get(user['id']) + + result = utils.get_metabase_sql_questions('bee42093-2c03-49f3-b185-200e745ec892') + + mock_get_request.assert_called_once_with('https://example.com/api/card?f=database&model_id=4') + assert len(result) == 1 + assert result[0]['id'] == 1 + assert result[0]['name'] == 'MBQL Card 1' + + @mock.patch('ckanext.in_app_reporting.utils.metabase_get_request') + def test_get_metabase_sql_questions_both_formats(self, mock_get_request, app): + """Test get_metabase_sql_questions handles both old and new formats""" + mock_get_request.return_value = [ + { + 'id': 1, + 'name': 'Old Format Card', + 'type': 'question', + 'updated_at': '2025-08-01T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'native': { + 'query': 'SELECT * FROM "test-resource-id"' + } + } + }, + { + 'id': 2, + 'name': 'New Format Card', + 'type': 'question', + 'updated_at': '2025-08-02T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "test-resource-id" WHERE status = "active"' + } + ] + } + } + ] + + with app.flask_app.app_context(): + with mock.patch('ckanext.in_app_reporting.utils.METABASE_DB_ID', '4'), \ + mock.patch('ckanext.in_app_reporting.utils.METABASE_SITE_URL', 'https://example.com'), \ + mock.patch('ckanext.in_app_reporting.utils.collection_ids', ['1']), \ + mock.patch('ckan.plugins.toolkit.get_action') as mock_get_action: + + mock_get_action.return_value.side_effect = toolkit.ObjectNotFound() + + # Set user in flask.g + import ckan.plugins.toolkit as tk + user = factories.User() + tk.g.user = user['name'] + tk.g.userobj = model.User.get(user['id']) + + result = utils.get_metabase_sql_questions('test-resource-id') + + mock_get_request.assert_called_once_with('https://example.com/api/card?f=database&model_id=4') + assert len(result) == 2 + assert result[0]['id'] == 2 # Sorted by type, then name + assert result[1]['id'] == 1 + @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "in_app_reporting") @@ -744,6 +946,72 @@ def test_get_metabase_chart_list_with_resource_id_match(self, mock_get_request): assert result[1]['id'] == 1 assert result[1]['name'] == 'SQL Chart A' + @mock.patch('ckanext.in_app_reporting.utils.metabase_get_request') + def test_get_metabase_chart_list_with_new_mbql_format(self, mock_get_request, app): + """Test get_metabase_chart_list with new MBQL format (stages with native SQL)""" + mock_get_request.return_value = [ + { + 'id': 1, + 'entity_id': 'card-1', + 'name': 'MBQL Chart A', + 'type': 'question', + 'updated_at': '2025-08-01T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "resource-123" WHERE status = "active"' + } + ] + } + }, + { + 'id': 2, + 'entity_id': 'card-2', + 'name': 'MBQL Chart B', + 'type': 'question', + 'updated_at': '2025-08-02T18:20:49.005658Z', + 'collection_id': 1, + 'table_id': None, + 'dataset_query': { + 'lib/type': 'mbql/query', + 'database': 60, + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT * FROM "resource-456" WHERE status = "active"' + } + ] + } + } + ] + + with app.flask_app.app_context(): + with mock.patch('ckanext.in_app_reporting.utils.METABASE_SITE_URL', 'https://example.com'), \ + mock.patch('ckanext.in_app_reporting.utils.METABASE_DB_ID', '4'), \ + mock.patch('ckanext.in_app_reporting.utils.collection_ids', ['1']), \ + mock.patch('ckan.plugins.toolkit.get_action') as mock_get_action: + + mock_get_action.return_value.side_effect = toolkit.ObjectNotFound() + + # Set user in flask.g + import ckan.plugins.toolkit as tk + user = factories.User() + tk.g.user = user['name'] + tk.g.userobj = model.User.get(user['id']) + + result = utils.get_metabase_chart_list(123, 'resource-123') + + mock_get_request.assert_called_once_with('https://example.com/api/card?f=database&model_id=4') + assert len(result) == 1 + assert result[0]['id'] == 1 + assert result[0]['name'] == 'MBQL Chart A' + assert result[0]['text'] == 'MBQL Chart A' + @mock.patch('ckanext.in_app_reporting.utils.metabase_get_request') def test_get_metabase_chart_list_no_api_response(self, mock_get_request): """Test get_metabase_chart_list when API returns no response""" diff --git a/ckanext/in_app_reporting/utils.py b/ckanext/in_app_reporting/utils.py index b6a473f..26ba9eb 100644 --- a/ckanext/in_app_reporting/utils.py +++ b/ckanext/in_app_reporting/utils.py @@ -279,6 +279,53 @@ def get_metabase_cards_by_table_id(table_id): return matching_cards +def _extract_native_sql_from_dataset_query(dataset_query: dict) -> str: + """ + Extract native SQL query from Metabase dataset_query, handling both old and new formats. + + New format (MBQL): + dataset_query: { + 'stages': [ + { + 'lib/type': 'mbql.stage/native', + 'native': 'SELECT ...' + } + ] + } + + Old format: + dataset_query: { + 'native': { + 'query': 'SELECT ...' + } + } + + Args: + dataset_query: The dataset_query dictionary from a Metabase card + + Returns: + The SQL query string, or empty string if not found + """ + if not dataset_query: + return '' + + # Check new format first (stages with native SQL) + stages = dataset_query.get('stages', []) + for stage in stages: + native_sql = stage.get('native', '') + if native_sql and isinstance(native_sql, str): + return native_sql + + # Fall back to old format (native.query) + native = dataset_query.get('native', {}) + if isinstance(native, dict): + return native.get('query', '') + elif isinstance(native, str): + return native + + return '' + + def get_metabase_sql_questions(resource_id): """ Get Metabase SQL questions that reference a specific resource ID. @@ -303,7 +350,9 @@ def get_metabase_sql_questions(resource_id): return matching_cards for card in card_results: if str(card.get('collection_id')) in metabase_mapping['collection_ids'] and not card.get('table_id'): - if resource_id in card.get('dataset_query', {}).get('native',{}).get('query', ''): + dataset_query = card.get('dataset_query', {}) + native_sql = _extract_native_sql_from_dataset_query(dataset_query) + if resource_id in native_sql: matching_cards.append({ 'id': card.get('id'), 'name': card.get('name'), @@ -382,15 +431,18 @@ def get_metabase_chart_list(table_id, resource_id): 'updated_at': card.get('updated_at'), 'text': card.get('name') }) - elif not card.get('table_id') and resource_id in card.get('dataset_query', {}).get('native',{}).get('query', ''): - matching_cards.append({ - 'id': card.get('id'), - 'entity_id': card.get('entity_id'), - 'name': card.get('name'), - 'type': card.get('type'), - 'updated_at': card.get('updated_at'), - 'text': card.get('name') - }) + elif not card.get('table_id'): + dataset_query = card.get('dataset_query', {}) + native_sql = _extract_native_sql_from_dataset_query(dataset_query) + if resource_id in native_sql: + matching_cards.append({ + 'id': card.get('id'), + 'entity_id': card.get('entity_id'), + 'name': card.get('name'), + 'type': card.get('type'), + 'updated_at': card.get('updated_at'), + 'text': card.get('name') + }) matching_cards.sort(key=lambda card: (card['updated_at']), reverse=True) return matching_cards