From e8640216286f80ba8c70fa5761de8f6bb1eb1994 Mon Sep 17 00:00:00 2001 From: Angelica Lastra Date: Fri, 17 May 2024 14:28:20 -0700 Subject: [PATCH 1/3] added xml_to_json script --- ea_airflow_util/callables/file_type.py | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 ea_airflow_util/callables/file_type.py diff --git a/ea_airflow_util/callables/file_type.py b/ea_airflow_util/callables/file_type.py new file mode 100644 index 0000000..1d83608 --- /dev/null +++ b/ea_airflow_util/callables/file_type.py @@ -0,0 +1,48 @@ +import json +import xmltodict +import os +import logging + +def xml_to_json( + xml_path: str, + output_path: str = None +): + """ + Transform an XML file into a JSON format. + """ + + try: + ### Open the input XML file and read data in form of python dictionary using xmltodict module. + with open(xml_path) as xml_file: + data_dict = xmltodict.parse(xml_file.read()) + except FileNotFoundError as error: + logging.error(f"Error: {str(error)} (XML file not found)") + except Exception as error: + logging.error(f"Error: {str(error)}") + + ### Generate the object using json.dumps() corresponding to JSON data. + json_data = json.dumps(data_dict) + + ### Check if output_path is provided, otherwise set it to a default value (XML folder path). + if output_path is None: + output_path = os.path.dirname(xml_path) + output_directory = f'{output_path}/json' + else: + output_directory = f'{output_path}/json' + + ### Set the name of the json file to the name of the XML file provided. + file_name = os.path.splitext(os.path.basename(xml_path))[0] + + ### Create the output directory if it doesn't exist. + if not os.path.exists(output_directory): + os.makedirs(output_directory) + + ### Write the contents of the JSON file into the folder path with a progress bar. + file_path = os.path.join(output_directory, f'{file_name}.json') + try: + with open(file_path, "w") as json_file: + json_file.write(json_data) + except Exception as error: + logging.error(f"Error: {str(error)}") + + return json_data \ No newline at end of file From 65d0d00157db7a32fd4a3445fa89ee0acf8e73ab Mon Sep 17 00:00:00 2001 From: Angelica Lastra Date: Fri, 17 May 2024 14:39:52 -0700 Subject: [PATCH 2/3] updated comment formatting --- ea_airflow_util/callables/file_type.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ea_airflow_util/callables/file_type.py b/ea_airflow_util/callables/file_type.py index 1d83608..c93e139 100644 --- a/ea_airflow_util/callables/file_type.py +++ b/ea_airflow_util/callables/file_type.py @@ -11,8 +11,8 @@ def xml_to_json( Transform an XML file into a JSON format. """ + # Open the input XML file and read data in form of python dictionary using xmltodict module. try: - ### Open the input XML file and read data in form of python dictionary using xmltodict module. with open(xml_path) as xml_file: data_dict = xmltodict.parse(xml_file.read()) except FileNotFoundError as error: @@ -20,24 +20,24 @@ def xml_to_json( except Exception as error: logging.error(f"Error: {str(error)}") - ### Generate the object using json.dumps() corresponding to JSON data. + # Generate the object using json.dumps() corresponding to JSON data. json_data = json.dumps(data_dict) - ### Check if output_path is provided, otherwise set it to a default value (XML folder path). + # Check if output_path is provided, otherwise set it to a default value (XML folder path). if output_path is None: output_path = os.path.dirname(xml_path) output_directory = f'{output_path}/json' else: output_directory = f'{output_path}/json' - ### Set the name of the json file to the name of the XML file provided. + # Set the name of the json file to the name of the XML file provided. file_name = os.path.splitext(os.path.basename(xml_path))[0] - ### Create the output directory if it doesn't exist. + # Create the output directory if it doesn't exist. if not os.path.exists(output_directory): os.makedirs(output_directory) - ### Write the contents of the JSON file into the folder path with a progress bar. + # Write the contents of the JSON file into the folder path with a progress bar. file_path = os.path.join(output_directory, f'{file_name}.json') try: with open(file_path, "w") as json_file: From 7d11c98b6d4152a86b494494be1993c43a354abf Mon Sep 17 00:00:00 2001 From: Angelica Lastra Date: Fri, 17 May 2024 14:46:16 -0700 Subject: [PATCH 3/3] Updated comments and clarified steps further --- ea_airflow_util/callables/file_type.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ea_airflow_util/callables/file_type.py b/ea_airflow_util/callables/file_type.py index c93e139..5db0de4 100644 --- a/ea_airflow_util/callables/file_type.py +++ b/ea_airflow_util/callables/file_type.py @@ -11,7 +11,7 @@ def xml_to_json( Transform an XML file into a JSON format. """ - # Open the input XML file and read data in form of python dictionary using xmltodict module. + # Open the input XML file and read data in form of python dictionary using the "xmltodict" module. try: with open(xml_path) as xml_file: data_dict = xmltodict.parse(xml_file.read()) @@ -20,10 +20,10 @@ def xml_to_json( except Exception as error: logging.error(f"Error: {str(error)}") - # Generate the object using json.dumps() corresponding to JSON data. + # Generate the json_data object using json.dumps(). json_data = json.dumps(data_dict) - # Check if output_path is provided, otherwise set it to a default value (XML folder path). + # Check if output_path is provided, otherwise set it to a default value (original XML folder path). if output_path is None: output_path = os.path.dirname(xml_path) output_directory = f'{output_path}/json' @@ -37,7 +37,7 @@ def xml_to_json( if not os.path.exists(output_directory): os.makedirs(output_directory) - # Write the contents of the JSON file into the folder path with a progress bar. + # Write the contents of the JSON file into the output folder path. file_path = os.path.join(output_directory, f'{file_name}.json') try: with open(file_path, "w") as json_file: