From 7ed521f11a0051133f6d8fbd62274bf6362189e4 Mon Sep 17 00:00:00 2001 From: John Samuel Date: Tue, 7 Apr 2026 11:34:21 +0200 Subject: [PATCH 1/4] Potential fix for code scanning alert no. 8: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- shexstatements/shexfromcsv.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/shexstatements/shexfromcsv.py b/shexstatements/shexfromcsv.py index 606c242..faaef3f 100644 --- a/shexstatements/shexfromcsv.py +++ b/shexstatements/shexfromcsv.py @@ -72,14 +72,17 @@ def generate_shex_from_csv(filepath, delim=",", skip_header=False, filename=True data = "" if filename: # Validate and normalize the path while allowing relative subdirectories. - normalized_path = os.path.normpath(filepath.strip()) - if not normalized_path: + raw_path = filepath.strip() + if not raw_path: raise ValueError("Empty filename is not allowed") - if os.path.isabs(normalized_path): - raise ValueError("Absolute paths are not allowed") - if normalized_path == ".." or normalized_path.startswith(".." + os.path.sep): - raise ValueError("Path traversal is not allowed") - with open(normalized_path) as csvfile: + normalized_path = os.path.normpath(raw_path) + # Resolve the path against a safe base directory (current working directory). + base_dir = os.getcwd() + candidate_path = os.path.normpath(os.path.join(base_dir, normalized_path)) + # Ensure the final path is within the base directory to prevent path traversal. + if os.path.commonpath([base_dir, candidate_path]) != os.path.abspath(base_dir): + raise ValueError("Access to the specified file path is not allowed") + with open(candidate_path) as csvfile: csvreader = csv.reader(csvfile, delimiter=delim) rowno = 0 for row in csvreader: From 6c794aa79b224d2dd6bc37d7156b9b3068cadbd7 Mon Sep 17 00:00:00 2001 From: John Samuel Date: Tue, 7 Apr 2026 11:39:47 +0200 Subject: [PATCH 2/4] Potential fix for code scanning alert no. 5: Uncontrolled data used in path expression Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- shexstatements/shexfromspreadsheet.py | 49 +++++++++++++++++++-------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/shexstatements/shexfromspreadsheet.py b/shexstatements/shexfromspreadsheet.py index cf3cd63..9562ab6 100644 --- a/shexstatements/shexfromspreadsheet.py +++ b/shexstatements/shexfromspreadsheet.py @@ -6,6 +6,7 @@ from os import remove from os.path import splitext +import tempfile from odf.opendocument import load from odf.table import TableCell, TableRow @@ -46,13 +47,23 @@ def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): if(file_extension in {".xlsx", ".xlsm", ".xltx", ".xltm"}): wb = None + temp_path = None if stream is not None: - with open("tmp" + filepath, "wb") as sf: - sf.write(stream) - sf.close() - filepath = "tmp" + filepath + fd, temp_path = tempfile.mkstemp(suffix=file_extension) + try: + with open(fd, "wb") as sf: + sf.write(stream) + except TypeError: + # Fallback for environments where opening by fd is not supported + import os + os.close(fd) + with open(temp_path, "wb") as sf: + sf.write(stream) + filepath_to_open = temp_path + else: + filepath_to_open = filepath - wb = load_workbook(filepath) + wb = load_workbook(filepath_to_open) for ws in wb.worksheets: for i in range(1, ws.max_row+1): line = list() @@ -63,8 +74,8 @@ def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): line = "|".join(line) data = data + line + "\n" - if stream is not None: - remove(filepath) + if stream is not None and temp_path is not None: + remove(temp_path) elif(file_extension in {".xls"}): wb = None @@ -84,13 +95,23 @@ def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): elif(file_extension in {".ods"}): wb = None + temp_path = None if stream is not None: - with open("tmp" + filepath, "wb") as sf: - sf.write(stream) - sf.close() - filepath = "tmp" + filepath + fd, temp_path = tempfile.mkstemp(suffix=file_extension) + try: + with open(fd, "wb") as sf: + sf.write(stream) + except TypeError: + # Fallback for environments where opening by fd is not supported + import os + os.close(fd) + with open(temp_path, "wb") as sf: + sf.write(stream) + filepath_to_open = temp_path + else: + filepath_to_open = filepath - wb = load(filepath) + wb = load(filepath_to_open) wb = wb.spreadsheet rows = wb.getElementsByType(TableRow) for row in rows: @@ -101,8 +122,8 @@ def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): line.append(str(cell)) data = data+"|".join(line) + "\n" - if stream is not None: - remove(filepath) + if stream is not None and temp_path is not None: + remove(temp_path) shexstatement = CSV.generate_shex_from_data_string(data) except Exception as e: From 45b18f92ca9f05707de7b316252102b53156b38e Mon Sep 17 00:00:00 2001 From: John Samuel Date: Tue, 7 Apr 2026 11:53:04 +0200 Subject: [PATCH 3/4] Potential fix for pull request finding 'CodeQL / Uncontrolled data used in path expression' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- shexstatements/shexfromcsv.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/shexstatements/shexfromcsv.py b/shexstatements/shexfromcsv.py index faaef3f..671cc24 100644 --- a/shexstatements/shexfromcsv.py +++ b/shexstatements/shexfromcsv.py @@ -75,14 +75,18 @@ def generate_shex_from_csv(filepath, delim=",", skip_header=False, filename=True raw_path = filepath.strip() if not raw_path: raise ValueError("Empty filename is not allowed") + # Normalize the user-supplied path. normalized_path = os.path.normpath(raw_path) + # Disallow absolute paths supplied by the caller. + if os.path.isabs(normalized_path): + raise ValueError("Absolute paths are not allowed") # Resolve the path against a safe base directory (current working directory). - base_dir = os.getcwd() - candidate_path = os.path.normpath(os.path.join(base_dir, normalized_path)) + base_dir = os.path.realpath(os.getcwd()) + real_candidate = os.path.realpath(os.path.join(base_dir, normalized_path)) # Ensure the final path is within the base directory to prevent path traversal. - if os.path.commonpath([base_dir, candidate_path]) != os.path.abspath(base_dir): + if os.path.commonpath([base_dir, real_candidate]) != base_dir: raise ValueError("Access to the specified file path is not allowed") - with open(candidate_path) as csvfile: + with open(real_candidate) as csvfile: csvreader = csv.reader(csvfile, delimiter=delim) rowno = 0 for row in csvreader: From 030cafd603b78c08c66a43c207371dd18918fa52 Mon Sep 17 00:00:00 2001 From: John Samuel Date: Tue, 7 Apr 2026 11:55:07 +0200 Subject: [PATCH 4/4] Potential fix for pull request finding 'CodeQL / Uncontrolled data used in path expression' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- shexstatements/shexfromcsv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shexstatements/shexfromcsv.py b/shexstatements/shexfromcsv.py index 671cc24..0e49f3e 100644 --- a/shexstatements/shexfromcsv.py +++ b/shexstatements/shexfromcsv.py @@ -80,8 +80,8 @@ def generate_shex_from_csv(filepath, delim=",", skip_header=False, filename=True # Disallow absolute paths supplied by the caller. if os.path.isabs(normalized_path): raise ValueError("Absolute paths are not allowed") - # Resolve the path against a safe base directory (current working directory). - base_dir = os.path.realpath(os.getcwd()) + # Resolve the path against a safe base directory (directory of this module). + base_dir = os.path.dirname(os.path.realpath(__file__)) real_candidate = os.path.realpath(os.path.join(base_dir, normalized_path)) # Ensure the final path is within the base directory to prevent path traversal. if os.path.commonpath([base_dir, real_candidate]) != base_dir: