Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions koku/koku/reportdb_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def get_table_check_sql(self, table_name: str, schema_name: str):
pass

@abstractmethod
def get_delete_day_by_manifestid_sql(
def get_delete_by_manifestid_sql(
self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str
):
"""Return the SQL to delete data where manifestid doesn't match"""
pass

@abstractmethod
def get_delete_day_by_reportnumhours_sql(
def get_delete_by_reportnumhours_sql(
self,
schema_name: str,
table_name: str,
Expand Down
24 changes: 22 additions & 2 deletions koku/koku/reportdb_accessor_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def get_table_check_sql(self, table_name: str, schema_name: str):
f"WHERE table_name = '{table_name}' AND table_schema = '{schema_name}'"
)

def get_delete_day_by_manifestid_sql(
def get_delete_by_manifestid_sql(
self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str
):
"""Return the SQL to delete data where manifestid doesn't match."""
Expand All @@ -81,7 +81,27 @@ def get_delete_day_by_manifestid_sql(
AND manifestid != '{manifestid}'
"""

def get_delete_day_by_reportnumhours_sql(
def get_delete_by_manifestid_and_date_sql(
self,
schema_name: str,
table_name: str,
source: str,
year: str,
month: str,
manifestid: str,
processing_date: str,
):
"""Return the SQL to delete data where manifestid doesn't match, scoped to dates >= processing_date."""
return f"""
DELETE FROM "{schema_name}"."{table_name}"
WHERE source = '{source}'
AND year = '{year}'
AND month = '{month}'
AND manifestid != '{manifestid}'
AND {DATE_COLUMN} >= DATE '{processing_date}'
"""
Comment on lines +95 to +102
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This method constructs a raw SQL query using an f-string, which is a potential SQL injection vulnerability. Although the values might be system-generated, it is a security best practice to use parameterized queries. Please consider modifying this method to return a SQL template and a list of parameters, and then use cursor.execute(sql, params) at the call site to safely execute the query. This would provide protection against SQL injection.


def get_delete_by_reportnumhours_sql(
self,
schema_name: str,
table_name: str,
Expand Down
4 changes: 2 additions & 2 deletions koku/koku/reportdb_accessor_trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def get_partition_create_sql(
# This method is not used for Trino, but must exist to satisfy the abstract base class
return ""

def get_delete_day_by_manifestid_sql(
def get_delete_by_manifestid_sql(
self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str
):
"""Trino delete by manifestid - not used, Trino uses S3 file deletion."""
Expand All @@ -95,7 +95,7 @@ def get_delete_day_by_manifestid_sql(
# This method exists only to satisfy the abstract base class
return ""

def get_delete_day_by_reportnumhours_sql(
def get_delete_by_reportnumhours_sql(
self,
schema_name: str,
table_name: str,
Expand Down
8 changes: 4 additions & 4 deletions koku/koku/test/test_reportdb_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ def test_get_table_check_sql(self):
self.assertIn(self.table_name, sql)
self.assertIn(self.schema_name, sql)

def test_get_delete_day_by_manifestid_sql(self):
def test_get_delete_by_manifestid_sql(self):
"""Test delete by manifest ID SQL generation."""
sql = self.accessor.get_delete_day_by_manifestid_sql(
sql = self.accessor.get_delete_by_manifestid_sql(
self.schema_name, self.table_name, self.source_uuid, "2024", "01", "123"
)
self.assertIn("DELETE FROM", sql)
self.assertIn("manifestid != '123'", sql)

def test_get_delete_day_by_reportnumhours_sql(self):
def test_get_delete_by_reportnumhours_sql(self):
"""Test delete by reportnumhours SQL generation."""
sql = self.accessor.get_delete_day_by_reportnumhours_sql(
sql = self.accessor.get_delete_by_reportnumhours_sql(
self.schema_name,
self.table_name,
self.source_uuid,
Expand Down
45 changes: 35 additions & 10 deletions koku/masu/database/aws_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def populate_line_item_daily_summary_table_trino(self, start_date, end_date, sou

"""
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/aws/reporting_awscostentrylineitem_daily_summary.sql"
"masu.database", f"{self.get_sql_folder_name()}/aws/reporting_awscostentrylineitem_daily_summary.sql"
)
sql = sql.decode("utf-8")
uuid_str = str(uuid.uuid4()).replace("-", "_")
Expand Down Expand Up @@ -174,7 +174,7 @@ def populate_ocp_on_aws_ui_summary_tables_trino(

for table_name in tables:
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/aws/openshift/ui_summary/{table_name}.sql"
"masu.database", f"{self.get_sql_folder_name()}/aws/openshift/ui_summary/{table_name}.sql"
)
sql = sql.decode("utf-8")
sql_params = {
Expand Down Expand Up @@ -277,12 +277,7 @@ def populate_ocp_on_aws_cost_daily_summary_trino(
bill_id,
report_period_id,
)
managed_path = f"{self.trino_sql_folder_name}/aws/openshift/populate_daily_summary"
prepare_sql, prepare_params = sql_metadata.prepare_template(
f"{managed_path}/0_prepare_daily_summary_tables.sql"
)
LOG.info(log_json(msg="Preparing tables for OCP on AWS flow", **prepare_params))
self._execute_trino_multipart_sql_query(prepare_sql, bind_params=prepare_params)
managed_path = f"{self.get_sql_folder_name()}/aws/openshift/populate_daily_summary"
self.delete_ocp_on_aws_hive_partition_by_day(
sql_metadata.days_tup,
sql_metadata.cloud_provider_uuid,
Expand Down Expand Up @@ -444,7 +439,7 @@ def get_openshift_on_cloud_matched_tags_trino(
):
"""Return a list of matched tags."""
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/aws/openshift/reporting_ocpaws_matched_tags.sql"
"masu.database", f"{self.get_sql_folder_name()}/aws/openshift/reporting_ocpaws_matched_tags.sql"
)
sql = sql.decode("utf-8")

Expand Down Expand Up @@ -511,7 +506,7 @@ def populate_ec2_compute_summary_table_trino(self, source_uuid, start_date, bill
}
LOG.info(log_json(msg=msg, context=context))

sql = pkgutil.get_data("masu.database", f"{self.trino_sql_folder_name}/aws/{table_name}.sql")
sql = pkgutil.get_data("masu.database", f"{self.get_sql_folder_name()}/aws/{table_name}.sql")
sql = sql.decode("utf-8")
sql_params = {
"schema": self.schema,
Expand All @@ -523,3 +518,33 @@ def populate_ec2_compute_summary_table_trino(self, source_uuid, start_date, bill
}

self._execute_trino_raw_sql_query(sql, sql_params=sql_params, log_ref=f"{table_name}.sql")

def delete_self_hosted_data_by_source(self, provider_uuid):
"""Delete data from all self-hosted tables by source UUID (for on-prem).

This deletes data from the line item tables when a source is deleted.

Args:
provider_uuid: The provider UUID to delete data for
"""
from reporting.provider.aws.self_hosted_models import get_self_hosted_models

provider_uuid_str = str(provider_uuid)
total_deleted = 0

with schema_context(self.schema):
for model in get_self_hosted_models():
deleted_count, _ = model.objects.filter(source=provider_uuid_str).delete()

if deleted_count:
LOG.info(
log_json(
msg="deleted self-hosted data by source",
table=model._meta.db_table,
provider_uuid=provider_uuid_str,
deleted_count=deleted_count,
)
)
total_deleted += deleted_count

return total_deleted
Comment on lines +522 to +550
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The method delete_self_hosted_data_by_source is nearly identical in AWSReportDBAccessor, AzureReportDBAccessor, and GCPReportDBAccessor. To improve maintainability and reduce code duplication, consider moving this logic to a shared base class, such as ReportDBAccessorBase. The provider-specific get_self_hosted_models function could be defined as an abstract method in the base class that subclasses are required to implement.

43 changes: 34 additions & 9 deletions koku/masu/database/azure_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def populate_line_item_daily_summary_table_trino(self, start_date, end_date, sou

"""
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/azure/reporting_azurecostentrylineitem_daily_summary.sql"
"masu.database", f"{self.get_sql_folder_name()}/azure/reporting_azurecostentrylineitem_daily_summary.sql"
)
sql = sql.decode("utf-8")
uuid_str = str(uuid.uuid4()).replace("-", "_")
Expand Down Expand Up @@ -212,7 +212,7 @@ def populate_ocp_on_azure_ui_summary_tables_trino(

for table_name in tables:
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/azure/openshift/ui_summary/{table_name}.sql"
"masu.database", f"{self.get_sql_folder_name()}/azure/openshift/ui_summary/{table_name}.sql"
)
sql = sql.decode("utf-8")
sql_params = {
Expand Down Expand Up @@ -280,12 +280,7 @@ def populate_ocp_on_azure_cost_daily_summary_trino(
bill_id,
report_period_id,
)
managed_path = f"{self.trino_sql_folder_name}/azure/openshift/populate_daily_summary"
prepare_sql, prepare_params = sql_metadata.prepare_template(
f"{managed_path}/0_prepare_daily_summary_tables.sql"
)
LOG.info(log_json(msg="Preparing tables for OCP on Azure flow", **prepare_params))
self._execute_trino_multipart_sql_query(prepare_sql, bind_params=prepare_params)
managed_path = f"{self.get_sql_folder_name()}/azure/openshift/populate_daily_summary"
self.delete_ocp_on_azure_hive_partition_by_day(
sql_metadata.days_tup,
sql_metadata.cloud_provider_uuid,
Expand Down Expand Up @@ -361,7 +356,7 @@ def get_openshift_on_cloud_matched_tags_trino(
):
"""Return a list of matched tags."""
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/azure/openshift/reporting_ocpazure_matched_tags.sql"
"masu.database", f"{self.get_sql_folder_name()}/azure/openshift/reporting_ocpazure_matched_tags.sql"
)
sql = sql.decode("utf-8")

Expand Down Expand Up @@ -445,3 +440,33 @@ def _get_matched_tags_strings(self, bill_id, azure_provider_uuid, ocp_provider_u
if matched_tags:
return [json.dumps(match).replace("{", "").replace("}", "") for match in matched_tags]
return matched_tags

def delete_self_hosted_data_by_source(self, provider_uuid):
"""Delete data from all self-hosted tables by source UUID (for on-prem).

This deletes data from the line item tables when a source is deleted.

Args:
provider_uuid: The provider UUID to delete data for
"""
from reporting.provider.azure.self_hosted_models import get_self_hosted_models

provider_uuid_str = str(provider_uuid)
total_deleted = 0

with schema_context(self.schema):
for model in get_self_hosted_models():
deleted_count, _ = model.objects.filter(source=provider_uuid_str).delete()

if deleted_count:
LOG.info(
log_json(
msg="deleted self-hosted data by source",
table=model._meta.db_table,
provider_uuid=provider_uuid_str,
deleted_count=deleted_count,
)
)
total_deleted += deleted_count

return total_deleted
45 changes: 35 additions & 10 deletions koku/masu/database/gcp_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def populate_line_item_daily_summary_table_trino(
"""

sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/gcp/reporting_gcpcostentrylineitem_daily_summary.sql"
"masu.database", f"{self.get_sql_folder_name()}/gcp/reporting_gcpcostentrylineitem_daily_summary.sql"
)
sql = sql.decode("utf-8")
uuid_str = str(uuid.uuid4()).replace("-", "_")
Expand All @@ -129,7 +129,7 @@ def populate_line_item_daily_summary_table_trino(

def fetch_invoice_month_dates(self, start_date, end_date, invoice_month, source_uuid):
"""Get extended valid date range for given invoice_month and start/end."""
sql = pkgutil.get_data("masu.database", f"{self.trino_sql_folder_name}/gcp/get_invoice_month_dates.sql")
sql = pkgutil.get_data("masu.database", f"{self.get_sql_folder_name()}/gcp/get_invoice_month_dates.sql")
sql = sql.decode("utf-8")
sql_params = {
"schema": self.schema,
Expand Down Expand Up @@ -314,12 +314,7 @@ def populate_ocp_on_gcp_cost_daily_summary_trino(
bill_id,
report_period_id,
)
managed_path = f"{self.trino_sql_folder_name}/gcp/openshift/populate_daily_summary/"
prepare_sql, prepare_params = sql_metadata.prepare_template(
f"{managed_path}/0_prepare_daily_summary_tables.sql", {"unattributed_storage": enable_unattributed_storage}
)
LOG.info(log_json(msg="Preparing tables for OCP on GCP flow", **prepare_params))
self._execute_trino_multipart_sql_query(prepare_sql, bind_params=prepare_params)
managed_path = f"{self.get_sql_folder_name()}/gcp/openshift/populate_daily_summary/"
self.delete_ocp_on_gcp_hive_partition_by_day(
sql_metadata.days_tup,
sql_metadata.cloud_provider_uuid,
Expand Down Expand Up @@ -377,7 +372,7 @@ def populate_ocp_on_gcp_ui_summary_tables_trino(

for table_name in tables:
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/gcp/openshift/ui_summary/{table_name}.sql"
"masu.database", f"{self.get_sql_folder_name()}/gcp/openshift/ui_summary/{table_name}.sql"
)
sql = sql.decode("utf-8")
sql_params = {
Expand Down Expand Up @@ -441,7 +436,7 @@ def get_openshift_on_cloud_matched_tags_trino(
"""Return a list of matched tags."""
invoice_month_date = kwargs.get("invoice_month_date")
sql = pkgutil.get_data(
"masu.database", f"{self.trino_sql_folder_name}/gcp/openshift/reporting_ocpgcp_matched_tags.sql"
"masu.database", f"{self.get_sql_folder_name()}/gcp/openshift/reporting_ocpgcp_matched_tags.sql"
)
sql = sql.decode("utf-8")

Expand Down Expand Up @@ -550,3 +545,33 @@ def check_for_matching_enabled_keys(self):
LOG.info(log_json(msg="no matching enabled keys for OCP on GCP", schema=self.schema))
return False
return True

def delete_self_hosted_data_by_source(self, provider_uuid):
"""Delete data from all self-hosted tables by source UUID (for on-prem).

This deletes data from the line item tables when a source is deleted.

Args:
provider_uuid: The provider UUID to delete data for
"""
from reporting.provider.gcp.self_hosted_models import get_self_hosted_models

provider_uuid_str = str(provider_uuid)
total_deleted = 0

with schema_context(self.schema):
for model in get_self_hosted_models():
deleted_count, _ = model.objects.filter(source=provider_uuid_str).delete()

if deleted_count:
LOG.info(
log_json(
msg="deleted self-hosted data by source",
table=model._meta.db_table,
provider_uuid=provider_uuid_str,
deleted_count=deleted_count,
)
)
total_deleted += deleted_count

return total_deleted
Loading
Loading