From 14e6d8d5d6762460a76bb05a2b8bd1680857d54d Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 7 Apr 2026 10:40:05 -0700 Subject: [PATCH 1/4] Fix __file__ NameError in power run scripts on Databricks Signed-off-by: Niranjan Artal --- nds-h/nds_h_power.py | 6 +++++- nds/nds_power.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/nds-h/nds_h_power.py b/nds-h/nds_h_power.py index b6a4cff..a3c59ca 100644 --- a/nds-h/nds_h_power.py +++ b/nds-h/nds_h_power.py @@ -42,7 +42,11 @@ # Python doesn't automatically include sibling directories in the import path. # We need to explicitly add the utils directory to sys.path to import shared utilities. -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +# Note: __file__ is not defined when Databricks runs scripts via exec(), so fall back to sys.argv[0]. +try: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +except NameError: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..')) utils_dir = os.path.join(parent_dir, 'utils') if utils_dir not in sys.path: sys.path.insert(0, utils_dir) diff --git a/nds/nds_power.py b/nds/nds_power.py index 8e82dd1..d338149 100644 --- a/nds/nds_power.py +++ b/nds/nds_power.py @@ -46,7 +46,11 @@ # Python doesn't automatically include sibling directories in the import path. # We need to explicitly add the utils directory to sys.path to import shared utilities. -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +# Note: __file__ is not defined when Databricks runs scripts via exec(), so fall back to sys.argv[0]. +try: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +except NameError: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..')) utils_dir = os.path.join(parent_dir, 'utils') if utils_dir not in sys.path: sys.path.insert(0, utils_dir) From db03bfd08a5a4af35413b49edac9697f7007a2e3 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 7 Apr 2026 11:32:31 -0700 Subject: [PATCH 2/4] Address review comments Signed-off-by: Niranjan Artal --- nds-h/nds_h_gen_data.py | 12 ++++++++++-- nds-h/nds_h_gen_query_stream.py | 9 ++++++++- nds-h/nds_h_power.py | 7 +++++-- nds/nds_maintenance.py | 9 ++++++++- nds/nds_power.py | 7 +++++-- 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/nds-h/nds_h_gen_data.py b/nds-h/nds_h_gen_data.py index 8293812..d411f84 100644 --- a/nds-h/nds_h_gen_data.py +++ b/nds-h/nds_h_gen_data.py @@ -36,8 +36,16 @@ import subprocess import shutil -#For adding utils to path -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +# Python doesn't automatically include sibling directories in the import path. +# We need to explicitly add the utils directory to sys.path to import shared utilities. +# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), +# so fall back to inspect to retrieve the filename from the compiled bytecode. +try: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +except NameError: + import inspect + _this_file = inspect.getfile(inspect.currentframe()) + parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) utils_dir = os.path.join(parent_dir, 'utils') sys.path.insert(0, utils_dir) diff --git a/nds-h/nds_h_gen_query_stream.py b/nds-h/nds_h_gen_query_stream.py index fad9380..20f91e7 100644 --- a/nds-h/nds_h_gen_query_stream.py +++ b/nds-h/nds_h_gen_query_stream.py @@ -34,7 +34,14 @@ import subprocess import sys -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), +# so fall back to inspect to retrieve the filename from the compiled bytecode. +try: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +except NameError: + import inspect + _this_file = inspect.getfile(inspect.currentframe()) + parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) utils_dir = os.path.join(parent_dir, 'utils') sys.path.insert(0, utils_dir) diff --git a/nds-h/nds_h_power.py b/nds-h/nds_h_power.py index a3c59ca..add193e 100644 --- a/nds-h/nds_h_power.py +++ b/nds-h/nds_h_power.py @@ -42,11 +42,14 @@ # Python doesn't automatically include sibling directories in the import path. # We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(), so fall back to sys.argv[0]. +# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), +# so fall back to inspect to retrieve the filename from the compiled bytecode. try: parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) except NameError: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..')) + import inspect + _this_file = inspect.getfile(inspect.currentframe()) + parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) utils_dir = os.path.join(parent_dir, 'utils') if utils_dir not in sys.path: sys.path.insert(0, utils_dir) diff --git a/nds/nds_maintenance.py b/nds/nds_maintenance.py index 295ca4d..4ac380c 100644 --- a/nds/nds_maintenance.py +++ b/nds/nds_maintenance.py @@ -45,7 +45,14 @@ # Python doesn't automatically include sibling directories in the import path. # We need to explicitly add the utils directory to sys.path to import shared utilities. -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), +# so fall back to inspect to retrieve the filename from the compiled bytecode. +try: + parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +except NameError: + import inspect + _this_file = inspect.getfile(inspect.currentframe()) + parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) utils_dir = os.path.join(parent_dir, 'utils') if utils_dir not in sys.path: sys.path.insert(0, utils_dir) diff --git a/nds/nds_power.py b/nds/nds_power.py index d338149..a7e3d59 100644 --- a/nds/nds_power.py +++ b/nds/nds_power.py @@ -46,11 +46,14 @@ # Python doesn't automatically include sibling directories in the import path. # We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(), so fall back to sys.argv[0]. +# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), +# so fall back to inspect to retrieve the filename from the compiled bytecode. try: parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) except NameError: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..')) + import inspect + _this_file = inspect.getfile(inspect.currentframe()) + parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) utils_dir = os.path.join(parent_dir, 'utils') if utils_dir not in sys.path: sys.path.insert(0, utils_dir) From 505f0bc40719b1a9d6c33c6bb14a9549ef6edb32 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 7 Apr 2026 11:40:24 -0700 Subject: [PATCH 3/4] update license header --- nds-h/nds_h_gen_data.py | 2 +- nds-h/nds_h_gen_query_stream.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nds-h/nds_h_gen_data.py b/nds-h/nds_h_gen_data.py index d411f84..bbbe271 100644 --- a/nds-h/nds_h_gen_data.py +++ b/nds-h/nds_h_gen_data.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nds-h/nds_h_gen_query_stream.py b/nds-h/nds_h_gen_query_stream.py index 20f91e7..1fc8e8f 100644 --- a/nds-h/nds_h_gen_query_stream.py +++ b/nds-h/nds_h_gen_query_stream.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); From b7e678a7721ea7104e4836763349336ab23bd34b Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Wed, 8 Apr 2026 16:07:06 -0700 Subject: [PATCH 4/4] address review comments --- nds-h/nds_h_gen_data.py | 14 ++----------- nds-h/nds_h_gen_query_stream.py | 12 ++--------- nds-h/nds_h_power.py | 15 ++------------ nds-h/setup_utils.py | 36 +++++++++++++++++++++++++++++++++ nds/nds_maintenance.py | 15 ++------------ nds/nds_power.py | 15 ++------------ nds/setup_utils.py | 36 +++++++++++++++++++++++++++++++++ 7 files changed, 82 insertions(+), 61 deletions(-) create mode 100644 nds-h/setup_utils.py create mode 100644 nds/setup_utils.py diff --git a/nds-h/nds_h_gen_data.py b/nds-h/nds_h_gen_data.py index bbbe271..7197d99 100644 --- a/nds-h/nds_h_gen_data.py +++ b/nds-h/nds_h_gen_data.py @@ -36,18 +36,8 @@ import subprocess import shutil -# Python doesn't automatically include sibling directories in the import path. -# We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), -# so fall back to inspect to retrieve the filename from the compiled bytecode. -try: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -except NameError: - import inspect - _this_file = inspect.getfile(inspect.currentframe()) - parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) -utils_dir = os.path.join(parent_dir, 'utils') -sys.path.insert(0, utils_dir) +from setup_utils import add_utils_to_sys_path +add_utils_to_sys_path() from check import check_build_nds_h, check_version, get_abs_path, get_dir_size, parallel_value_type, valid_range diff --git a/nds-h/nds_h_gen_query_stream.py b/nds-h/nds_h_gen_query_stream.py index 1fc8e8f..5f5eebf 100644 --- a/nds-h/nds_h_gen_query_stream.py +++ b/nds-h/nds_h_gen_query_stream.py @@ -34,16 +34,8 @@ import subprocess import sys -# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), -# so fall back to inspect to retrieve the filename from the compiled bytecode. -try: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -except NameError: - import inspect - _this_file = inspect.getfile(inspect.currentframe()) - parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) -utils_dir = os.path.join(parent_dir, 'utils') -sys.path.insert(0, utils_dir) +from setup_utils import add_utils_to_sys_path +add_utils_to_sys_path() from check import check_build_nds_h, check_version, get_abs_path diff --git a/nds-h/nds_h_power.py b/nds-h/nds_h_power.py index add193e..a49d864 100644 --- a/nds-h/nds_h_power.py +++ b/nds-h/nds_h_power.py @@ -40,19 +40,8 @@ import re import subprocess -# Python doesn't automatically include sibling directories in the import path. -# We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), -# so fall back to inspect to retrieve the filename from the compiled bytecode. -try: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -except NameError: - import inspect - _this_file = inspect.getfile(inspect.currentframe()) - parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) -utils_dir = os.path.join(parent_dir, 'utils') -if utils_dir not in sys.path: - sys.path.insert(0, utils_dir) +from setup_utils import add_utils_to_sys_path +add_utils_to_sys_path() from spark_utils import setQueryName, clearQueryName from profiler import Profiler diff --git a/nds-h/setup_utils.py b/nds-h/setup_utils.py new file mode 100644 index 0000000..86cffe5 --- /dev/null +++ b/nds-h/setup_utils.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import inspect +import os +import sys + + +def add_utils_to_sys_path(): + """Add the sibling utils/ directory to sys.path so shared modules can be imported. + + Uses inspect.stack() to resolve the calling script's location from the bytecode, + which works both in standard execution and in Databricks exec(compile(...)) contexts + where __file__ is not defined. + """ + caller_file = inspect.stack()[1].filename + parent_dir = os.path.abspath(os.path.join(os.path.dirname(caller_file), '..')) + utils_dir = os.path.join(parent_dir, 'utils') + if utils_dir not in sys.path: + sys.path.insert(0, utils_dir) diff --git a/nds/nds_maintenance.py b/nds/nds_maintenance.py index 4ac380c..925e88e 100644 --- a/nds/nds_maintenance.py +++ b/nds/nds_maintenance.py @@ -43,19 +43,8 @@ from nds_schema import get_maintenance_schemas from nds_power import register_delta_tables -# Python doesn't automatically include sibling directories in the import path. -# We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), -# so fall back to inspect to retrieve the filename from the compiled bytecode. -try: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -except NameError: - import inspect - _this_file = inspect.getfile(inspect.currentframe()) - parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) -utils_dir = os.path.join(parent_dir, 'utils') -if utils_dir not in sys.path: - sys.path.insert(0, utils_dir) +from setup_utils import add_utils_to_sys_path +add_utils_to_sys_path() from spark_utils import setQueryName, clearQueryName INSERT_FUNCS = [ diff --git a/nds/nds_power.py b/nds/nds_power.py index a7e3d59..4514798 100644 --- a/nds/nds_power.py +++ b/nds/nds_power.py @@ -44,19 +44,8 @@ from check import check_json_summary_folder, check_query_subset_exists, check_version from nds_schema import get_schemas -# Python doesn't automatically include sibling directories in the import path. -# We need to explicitly add the utils directory to sys.path to import shared utilities. -# Note: __file__ is not defined when Databricks runs scripts via exec(compile(...)), -# so fall back to inspect to retrieve the filename from the compiled bytecode. -try: - parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -except NameError: - import inspect - _this_file = inspect.getfile(inspect.currentframe()) - parent_dir = os.path.abspath(os.path.join(os.path.dirname(_this_file), '..')) -utils_dir = os.path.join(parent_dir, 'utils') -if utils_dir not in sys.path: - sys.path.insert(0, utils_dir) +from setup_utils import add_utils_to_sys_path +add_utils_to_sys_path() from spark_utils import setQueryName, clearQueryName from profiler import Profiler diff --git a/nds/setup_utils.py b/nds/setup_utils.py new file mode 100644 index 0000000..86cffe5 --- /dev/null +++ b/nds/setup_utils.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import inspect +import os +import sys + + +def add_utils_to_sys_path(): + """Add the sibling utils/ directory to sys.path so shared modules can be imported. + + Uses inspect.stack() to resolve the calling script's location from the bytecode, + which works both in standard execution and in Databricks exec(compile(...)) contexts + where __file__ is not defined. + """ + caller_file = inspect.stack()[1].filename + parent_dir = os.path.abspath(os.path.join(os.path.dirname(caller_file), '..')) + utils_dir = os.path.join(parent_dir, 'utils') + if utils_dir not in sys.path: + sys.path.insert(0, utils_dir)