From 493613ae9f5956d3830721f3392f83ddf2061327 Mon Sep 17 00:00:00 2001 From: Yaron Dayagi Date: Mon, 23 Feb 2026 16:11:57 +0200 Subject: [PATCH 1/2] [FLPATH-3323] Add AWS self-hosted/on-prem support Co-Authored-By: Claude Opus 4.6 --- koku/koku/reportdb_accessor.py | 4 +- koku/koku/reportdb_accessor_postgres.py | 24 +- koku/koku/reportdb_accessor_trino.py | 4 +- koku/koku/test/test_reportdb_accessor.py | 8 +- koku/masu/database/aws_report_db_accessor.py | 45 +- .../1_resource_matching_by_cluster.sql | 178 ++++ .../2_summarize_data_by_cluster.sql | 861 ++++++++++++++++++ ...wscostlineitem_project_daily_summary_p.sql | 162 ++++ .../reporting_ocpaws_matched_tags.sql | 64 ++ .../reporting_ocpaws_compute_summary_p.sql | 55 ++ ...rting_ocpaws_cost_summary_by_account_p.sql | 46 + ...orting_ocpaws_cost_summary_by_region_p.sql | 50 + ...rting_ocpaws_cost_summary_by_service_p.sql | 50 + .../reporting_ocpaws_cost_summary_p.sql | 44 + .../reporting_ocpaws_database_summary_p.sql | 53 ++ .../reporting_ocpaws_network_summary_p.sql | 53 ++ .../reporting_ocpaws_storage_summary_p.sql | 54 ++ ...wscostlineitem_project_daily_summary_p.sql | 104 +++ ...ing_awscostentrylineitem_daily_summary.sql | 152 ++++ ...entrylineitem_summary_by_ec2_compute_p.sql | 177 ++++ ...porting_ocpinfrastructure_provider_map.sql | 64 ++ .../openshift/ocp_special_matched_tags.sql | 44 + .../aws/aws_report_parquet_processor.py | 35 + .../ocp/ocp_report_parquet_processor.py | 65 +- .../parquet/parquet_report_processor.py | 2 +- .../report_parquet_processor_base.py | 127 ++- .../database/test_aws_report_db_accessor.py | 37 + .../aws/test_aws_report_parquet_processor.py | 92 ++ .../ocp/test_ocp_report_parquet_processor.py | 14 +- .../parquet/test_parquet_report_processor.py | 4 +- .../test_report_parquet_processor_base.py | 23 + koku/masu/test/util/aws/test_common.py | 138 +++ koku/masu/util/aws/common.py | 113 ++- ...1_awslineitem_awslineitemdaily_and_more.py | 425 +++++++++ koku/reporting/provider/aws/models.py | 13 + .../aws/openshift/self_hosted_models.py | 173 ++++ .../provider/aws/self_hosted_models.py | 167 ++++ 37 files changed, 3626 insertions(+), 98 deletions(-) create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/3_reporting_ocpawscostlineitem_project_daily_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/reporting_ocpaws_matched_tags.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_compute_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_account_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_region_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_service_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_database_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_network_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_storage_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpawscostlineitem_project_daily_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_daily_summary.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_summary_by_ec2_compute_p.sql create mode 100755 koku/masu/database/self_hosted_sql/aws/reporting_ocpinfrastructure_provider_map.sql create mode 100644 koku/masu/database/self_hosted_sql/openshift/ocp_special_matched_tags.sql create mode 100644 koku/reporting/migrations/0351_awslineitem_awslineitemdaily_and_more.py create mode 100644 koku/reporting/provider/aws/openshift/self_hosted_models.py create mode 100644 koku/reporting/provider/aws/self_hosted_models.py diff --git a/koku/koku/reportdb_accessor.py b/koku/koku/reportdb_accessor.py index 93942e4868..f44d9ef254 100755 --- a/koku/koku/reportdb_accessor.py +++ b/koku/koku/reportdb_accessor.py @@ -43,14 +43,14 @@ def get_table_check_sql(self, table_name: str, schema_name: str): pass @abstractmethod - def get_delete_day_by_manifestid_sql( + def get_delete_by_manifestid_sql( self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str ): """Return the SQL to delete data where manifestid doesn't match""" pass @abstractmethod - def get_delete_day_by_reportnumhours_sql( + def get_delete_by_reportnumhours_sql( self, schema_name: str, table_name: str, diff --git a/koku/koku/reportdb_accessor_postgres.py b/koku/koku/reportdb_accessor_postgres.py index 26f2d8cc0e..c0b307424c 100755 --- a/koku/koku/reportdb_accessor_postgres.py +++ b/koku/koku/reportdb_accessor_postgres.py @@ -69,7 +69,7 @@ def get_table_check_sql(self, table_name: str, schema_name: str): f"WHERE table_name = '{table_name}' AND table_schema = '{schema_name}'" ) - def get_delete_day_by_manifestid_sql( + def get_delete_by_manifestid_sql( self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str ): """Return the SQL to delete data where manifestid doesn't match.""" @@ -81,7 +81,27 @@ def get_delete_day_by_manifestid_sql( AND manifestid != '{manifestid}' """ - def get_delete_day_by_reportnumhours_sql( + def get_delete_by_manifestid_and_date_sql( + self, + schema_name: str, + table_name: str, + source: str, + year: str, + month: str, + manifestid: str, + processing_date: str, + ): + """Return the SQL to delete data where manifestid doesn't match, scoped to dates >= processing_date.""" + return f""" + DELETE FROM "{schema_name}"."{table_name}" + WHERE source = '{source}' + AND year = '{year}' + AND month = '{month}' + AND manifestid != '{manifestid}' + AND {DATE_COLUMN} >= DATE '{processing_date}' + """ + + def get_delete_by_reportnumhours_sql( self, schema_name: str, table_name: str, diff --git a/koku/koku/reportdb_accessor_trino.py b/koku/koku/reportdb_accessor_trino.py index b44b3ddfdc..9b8a4941b6 100755 --- a/koku/koku/reportdb_accessor_trino.py +++ b/koku/koku/reportdb_accessor_trino.py @@ -86,7 +86,7 @@ def get_partition_create_sql( # This method is not used for Trino, but must exist to satisfy the abstract base class return "" - def get_delete_day_by_manifestid_sql( + def get_delete_by_manifestid_sql( self, schema_name: str, table_name: str, source: str, year: str, month: str, manifestid: str ): """Trino delete by manifestid - not used, Trino uses S3 file deletion.""" @@ -95,7 +95,7 @@ def get_delete_day_by_manifestid_sql( # This method exists only to satisfy the abstract base class return "" - def get_delete_day_by_reportnumhours_sql( + def get_delete_by_reportnumhours_sql( self, schema_name: str, table_name: str, diff --git a/koku/koku/test/test_reportdb_accessor.py b/koku/koku/test/test_reportdb_accessor.py index 70c7a89315..f6e6adf5f2 100644 --- a/koku/koku/test/test_reportdb_accessor.py +++ b/koku/koku/test/test_reportdb_accessor.py @@ -50,17 +50,17 @@ def test_get_table_check_sql(self): self.assertIn(self.table_name, sql) self.assertIn(self.schema_name, sql) - def test_get_delete_day_by_manifestid_sql(self): + def test_get_delete_by_manifestid_sql(self): """Test delete by manifest ID SQL generation.""" - sql = self.accessor.get_delete_day_by_manifestid_sql( + sql = self.accessor.get_delete_by_manifestid_sql( self.schema_name, self.table_name, self.source_uuid, "2024", "01", "123" ) self.assertIn("DELETE FROM", sql) self.assertIn("manifestid != '123'", sql) - def test_get_delete_day_by_reportnumhours_sql(self): + def test_get_delete_by_reportnumhours_sql(self): """Test delete by reportnumhours SQL generation.""" - sql = self.accessor.get_delete_day_by_reportnumhours_sql( + sql = self.accessor.get_delete_by_reportnumhours_sql( self.schema_name, self.table_name, self.source_uuid, diff --git a/koku/masu/database/aws_report_db_accessor.py b/koku/masu/database/aws_report_db_accessor.py index e8f5a671a9..4c61d22e45 100644 --- a/koku/masu/database/aws_report_db_accessor.py +++ b/koku/masu/database/aws_report_db_accessor.py @@ -112,7 +112,7 @@ def populate_line_item_daily_summary_table_trino(self, start_date, end_date, sou """ sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/aws/reporting_awscostentrylineitem_daily_summary.sql" + "masu.database", f"{self.get_sql_folder_name()}/aws/reporting_awscostentrylineitem_daily_summary.sql" ) sql = sql.decode("utf-8") uuid_str = str(uuid.uuid4()).replace("-", "_") @@ -174,7 +174,7 @@ def populate_ocp_on_aws_ui_summary_tables_trino( for table_name in tables: sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/aws/openshift/ui_summary/{table_name}.sql" + "masu.database", f"{self.get_sql_folder_name()}/aws/openshift/ui_summary/{table_name}.sql" ) sql = sql.decode("utf-8") sql_params = { @@ -277,12 +277,7 @@ def populate_ocp_on_aws_cost_daily_summary_trino( bill_id, report_period_id, ) - managed_path = f"{self.trino_sql_folder_name}/aws/openshift/populate_daily_summary" - prepare_sql, prepare_params = sql_metadata.prepare_template( - f"{managed_path}/0_prepare_daily_summary_tables.sql" - ) - LOG.info(log_json(msg="Preparing tables for OCP on AWS flow", **prepare_params)) - self._execute_trino_multipart_sql_query(prepare_sql, bind_params=prepare_params) + managed_path = f"{self.get_sql_folder_name()}/aws/openshift/populate_daily_summary" self.delete_ocp_on_aws_hive_partition_by_day( sql_metadata.days_tup, sql_metadata.cloud_provider_uuid, @@ -444,7 +439,7 @@ def get_openshift_on_cloud_matched_tags_trino( ): """Return a list of matched tags.""" sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/aws/openshift/reporting_ocpaws_matched_tags.sql" + "masu.database", f"{self.get_sql_folder_name()}/aws/openshift/reporting_ocpaws_matched_tags.sql" ) sql = sql.decode("utf-8") @@ -511,7 +506,7 @@ def populate_ec2_compute_summary_table_trino(self, source_uuid, start_date, bill } LOG.info(log_json(msg=msg, context=context)) - sql = pkgutil.get_data("masu.database", f"{self.trino_sql_folder_name}/aws/{table_name}.sql") + sql = pkgutil.get_data("masu.database", f"{self.get_sql_folder_name()}/aws/{table_name}.sql") sql = sql.decode("utf-8") sql_params = { "schema": self.schema, @@ -523,3 +518,33 @@ def populate_ec2_compute_summary_table_trino(self, source_uuid, start_date, bill } self._execute_trino_raw_sql_query(sql, sql_params=sql_params, log_ref=f"{table_name}.sql") + + def delete_self_hosted_data_by_source(self, provider_uuid): + """Delete data from all self-hosted tables by source UUID (for on-prem). + + This deletes data from the line item tables when a source is deleted. + + Args: + provider_uuid: The provider UUID to delete data for + """ + from reporting.provider.aws.self_hosted_models import get_self_hosted_models + + provider_uuid_str = str(provider_uuid) + total_deleted = 0 + + with schema_context(self.schema): + for model in get_self_hosted_models(): + deleted_count, _ = model.objects.filter(source=provider_uuid_str).delete() + + if deleted_count: + LOG.info( + log_json( + msg="deleted self-hosted data by source", + table=model._meta.db_table, + provider_uuid=provider_uuid_str, + deleted_count=deleted_count, + ) + ) + total_deleted += deleted_count + + return total_deleted diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql new file mode 100755 index 0000000000..7f60234097 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql @@ -0,0 +1,178 @@ +DELETE FROM {{schema | sqlsafe}}.managed_aws_openshift_daily_temp +WHERE source = {{cloud_provider_uuid}} +AND ocp_source = {{ocp_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + +INSERT INTO {{schema | sqlsafe}}.managed_aws_openshift_daily_temp ( + row_uuid, + resource_id, + product_code, + usage_start, + usage_account_id, + availability_zone, + product_family, + instance_type, + region, + unit, + tags, + aws_cost_category, + usage_amount, + data_transfer_direction, + currency_code, + unblended_cost, + blended_cost, + savingsplan_effective_cost, + calculated_amortized_cost, + resource_id_matched, + matched_tag, + source, + ocp_source, + year, + month, + day +) +WITH cte_aws_resource_names AS ( + SELECT DISTINCT lineitem_resourceid + FROM {{schema | sqlsafe}}.aws_line_items_daily + WHERE source = {{cloud_provider_uuid}} + AND year = {{year}} + AND month = {{month}} + AND lineitem_usagestartdate >= {{start_date}} + AND lineitem_usagestartdate < {{end_date}} + INTERVAL '1 day' +), +cte_array_agg_nodes AS ( + SELECT DISTINCT resource_id + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND resource_id != '' + AND year = {{year}} + AND month = {{month}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +), +cte_array_agg_volumes AS ( + SELECT DISTINCT persistentvolume, csi_volume_handle + FROM {{schema | sqlsafe}}.openshift_storage_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND persistentvolume != '' + AND year = {{year}} + AND month = {{month}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +), +cte_matchable_resource_names AS ( + SELECT resource_names.lineitem_resourceid + FROM cte_aws_resource_names AS resource_names + JOIN cte_array_agg_nodes AS nodes + ON right(resource_names.lineitem_resourceid, length(nodes.resource_id)) = nodes.resource_id + + UNION + + SELECT resource_names.lineitem_resourceid + FROM cte_aws_resource_names AS resource_names + JOIN cte_array_agg_volumes AS volumes + ON ( + right(resource_names.lineitem_resourceid, length(volumes.persistentvolume)) = volumes.persistentvolume + OR (volumes.csi_volume_handle != '' AND right(resource_names.lineitem_resourceid, length(volumes.csi_volume_handle)) = volumes.csi_volume_handle) + ) +), +cte_agg_tags AS ( + SELECT array_agg(cte_tag_matches.matched_tag) as matched_tags from ( + SELECT * FROM unnest(CAST(ARRAY{{matched_tag_array | sqlsafe}} AS VARCHAR[])) as t(matched_tag) + ) as cte_tag_matches +), +cte_enabled_tag_keys AS ( + SELECT + CASE WHEN array_agg(key) IS NOT NULL + THEN ARRAY['openshift_cluster', 'openshift_node', 'openshift_project'] || array_agg(key) + ELSE ARRAY['openshift_cluster', 'openshift_node', 'openshift_project'] + END as enabled_keys + FROM {{schema | sqlsafe}}.reporting_enabledtagkeys + WHERE enabled = TRUE + AND provider_type = 'AWS' +) +SELECT + aws.row_uuid, + nullif(aws.lineitem_resourceid, '') as resource_id, + CASE + WHEN aws.bill_billingentity='AWS Marketplace' THEN coalesce(nullif(aws.product_productname, ''), nullif(aws.lineitem_productcode, '')) + ELSE nullif(aws.lineitem_productcode, '') + END as product_code, + aws.lineitem_usagestartdate as usage_start, + aws.lineitem_usageaccountid as usage_account_id, + nullif(aws.lineitem_availabilityzone, '') as availability_zone, + nullif(aws.product_productfamily, '') as product_family, + nullif(aws.product_instancetype, '') as instance_type, + nullif(aws.product_region, '') as region, + nullif(aws.pricing_unit, '') as unit, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(aws.resourcetags::jsonb) WHERE key = ANY(etk.enabled_keys))::text as tags, + aws.costcategory as aws_cost_category, + aws.lineitem_usageamount as usage_amount, + CASE + -- Is this a network record? + WHEN aws.lineitem_productcode = 'AmazonEC2' AND aws.product_productfamily = 'Data Transfer' THEN + -- Yes, it's a network record. What's the direction? + CASE + WHEN strpos(lower(aws.lineitem_usagetype), 'in-bytes') > 0 THEN 'IN' + WHEN strpos(lower(aws.lineitem_usagetype), 'out-bytes') > 0 THEN 'OUT' + WHEN (strpos(lower(aws.lineitem_usagetype), 'regional-bytes') > 0 AND strpos(lower(lineitem_operation), '-in') > 0) THEN 'IN' + WHEN (strpos(lower(aws.lineitem_usagetype), 'regional-bytes') > 0 AND strpos(lower(lineitem_operation), '-out') > 0) THEN 'OUT' + ELSE NULL + END + END AS data_transfer_direction, + nullif(aws.lineitem_currencycode, '') as currency_code, + -- SavingsPlanCoveredUsage needs to be negated to show accurate cost COST-5098 + CASE + WHEN aws.lineitem_lineitemtype='SavingsPlanCoveredUsage' + THEN 0.0 + ELSE aws.lineitem_unblendedcost + END as unblended_cost, + CASE + WHEN aws.lineitem_lineitemtype='SavingsPlanCoveredUsage' + THEN 0.0 + ELSE aws.lineitem_blendedcost + END as blended_cost, + aws.savingsplan_savingsplaneffectivecost as savingsplan_effective_cost, + CASE + WHEN aws.lineitem_lineitemtype='Tax' + OR aws.lineitem_lineitemtype='Usage' + THEN aws.lineitem_unblendedcost + ELSE aws.savingsplan_savingsplaneffectivecost + END as calculated_amortized_cost, + CASE WHEN resource_names.lineitem_resourceid IS NOT NULL + THEN TRUE + ELSE FALSE + END as resource_id_matched, + array_to_string( + ARRAY( + SELECT tag + FROM unnest(tag_matches.matched_tags) AS tag + WHERE strpos(resourcetags, tag) != 0 + ), + ',' + ) as matched_tag, + aws.source as source, + {{ocp_provider_uuid}} as ocp_source, + aws.year, + aws.month, + EXTRACT(DAY FROM aws.lineitem_usagestartdate)::text as day +FROM {{schema | sqlsafe}}.aws_line_items_daily AS aws +LEFT JOIN cte_matchable_resource_names AS resource_names + ON resource_names.lineitem_resourceid = aws.lineitem_resourceid +LEFT JOIN cte_agg_tags AS tag_matches + ON EXISTS ( + SELECT 1 + FROM unnest(tag_matches.matched_tags) AS matched_tag + WHERE strpos(resourcetags, matched_tag) != 0 + ) + AND resource_names.lineitem_resourceid IS NULL +CROSS JOIN cte_enabled_tag_keys as etk +WHERE aws.source = {{cloud_provider_uuid}} + AND aws.year = {{year}} + AND aws.month= {{month}} + AND aws.lineitem_usagestartdate >= {{start_date}} + AND aws.lineitem_usagestartdate < {{end_date}} + INTERVAL '1 day' + AND (resource_names.lineitem_resourceid IS NOT NULL OR tag_matches.matched_tags IS NOT NULL) +RETURNING 1 diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql new file mode 100755 index 0000000000..18cc64d1a1 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql @@ -0,0 +1,861 @@ +DELETE FROM {{schema | sqlsafe}}.managed_aws_openshift_disk_capacities_temp +WHERE ocp_source = {{ocp_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + + +-- Developer notes +-- We can't use the aws_openshift_daily table to calcualte +-- the capacity because it has already aggregated cost per +-- each hour. +INSERT INTO {{schema | sqlsafe}}.managed_aws_openshift_disk_capacities_temp ( + resource_id, + capacity, + usage_start, + ocp_source, + year, + month +) +WITH cte_hours as ( + SELECT EXTRACT(DAY FROM (DATE_TRUNC('month', {{start_date}}) + INTERVAL '1 month - 1 day')) * 24 as in_month +), +cte_ocp_filtered_resources as ( + select + distinct aws.resource_id as resource_id, + {{ocp_provider_uuid}} as ocp_source, + DATE(aws.usage_start) as usage_start, + aws.year as year, + aws.month as month + FROM {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.csi_volume_handle) != 0 + AND ocp.csi_volume_handle is not null + AND ocp.csi_volume_handle != '' + WHERE + ocp.source_uuid = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.source = {{cloud_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} +), +calculated_capacity AS ( + SELECT + aws.lineitem_resourceid as resource_id, + ROUND(MAX(aws.lineitem_unblendedcost) / (MAX(aws.lineitem_unblendedrate) / MAX(hours.in_month))) AS capacity, + ocpaws.usage_start, + {{ocp_provider_uuid}} as ocp_source, + {{year}} as year, + {{month}} as month + FROM {{schema | sqlsafe}}.aws_line_items as aws + INNER JOIN cte_ocp_filtered_resources as ocpaws + ON aws.lineitem_resourceid = ocpaws.resource_id + AND DATE(aws.lineitem_usagestartdate) = ocpaws.usage_start + CROSS JOIN cte_hours as hours + WHERE aws.year = {{year}} + AND aws.month = {{month}} + AND aws.source = {{cloud_provider_uuid}} + GROUP BY aws.lineitem_resourceid, ocpaws.usage_start +) +SELECT * +FROM calculated_capacity +WHERE capacity > 0 +RETURNING 1; + +DELETE FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp +WHERE ocp_source = {{ocp_provider_uuid}} +AND source = {{cloud_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + +-- Storage disk resource id matching +-- Algorhtim: +-- (PV Capacity) / Disk Capacity * Cost of Disk +-- PV without PVCs are unattributed storage +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + usage_amount, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_labels, + volume_labels, + tags, + aws_cost_category, + cost_category_id, + resource_id_matched, + source, + ocp_source, + year, + month +) +SELECT uuid_generate_v4()::text as row_uuid, -- need a new uuid or it will deduplicate + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + 'Storage' as data_source, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN 'Storage unattributed' + ELSE max(ocp.namespace) + END as namespace, + max(ocp.node) as node, + max(ocp.persistentvolumeclaim) as persistentvolumeclaim, + max(ocp.persistentvolume) as persistentvolume, + max(ocp.storageclass) as storageclass, + max(aws.resource_id) as resource_id, + max(aws.usage_start) as usage_start, + max(aws.usage_start) as usage_end, + max(aws.product_code) as product_code, + max(aws.product_family) as product_family, + max(aws.instance_type) as instance_type, + max(aws.usage_account_id) as usage_account_id, + max(aws.availability_zone) as availability_zone, + max(aws.region) as region, + max(aws.unit) as unit, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN cast(NULL as double precision) + ELSE max(aws.usage_amount) + END as usage_amount, + max(aws.currency_code) as currency_code, + max(ocp.persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.unblended_cost) as unblended_cost, + (max(persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.unblended_cost)) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(ocp.persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.blended_cost) as blended_cost, + (max(persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.blended_cost)) * cast({{markup}} as decimal(24,9)) as markup_cost_blended, + max(ocp.persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.savingsplan_effective_cost) as savingsplan_effective_cost, + (max(persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.savingsplan_effective_cost)) * cast({{markup}} as decimal(24,9)) as markup_cost_savingsplan, + max(ocp.persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.calculated_amortized_cost) as calculated_amortized_cost, + (max(persistentvolumeclaim_capacity_gigabyte) / max(aws_disk.capacity) * max(aws.calculated_amortized_cost)) * cast({{markup}} as decimal(24,9)) as markup_cost_amortized, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN NULL::jsonb + ELSE ocp.pod_labels::jsonb + END as pod_labels, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN NULL::jsonb + ELSE ocp.volume_labels::jsonb + END as volume_labels, + max(aws.tags) as tags, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN NULL + ELSE max(aws.aws_cost_category) + END as aws_cost_category, + CASE + WHEN max(ocp.persistentvolumeclaim) = '' + THEN NULL + ELSE max(ocp.cost_category_id) + END as cost_category_id, + bool_or(aws.resource_id_matched) as resource_id_matched, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(aws.year) as year, + max(aws.month) as month +FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp +JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.csi_volume_handle) != 0 + AND ocp.csi_volume_handle is not null + AND ocp.csi_volume_handle != '' +JOIN {{schema | sqlsafe}}.managed_aws_openshift_disk_capacities_temp AS aws_disk + ON aws_disk.usage_start = aws.usage_start + AND aws_disk.resource_id = aws.resource_id +WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND ocp.persistentvolume is not null + AND aws.source = {{cloud_provider_uuid}} + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + -- Filter out Node Network Costs since they cannot be attributed to a namespace and are accounted for later + AND aws.data_transfer_direction IS NULL + AND ocp.namespace != 'Storage unattributed' + AND aws.resource_id_matched = True + AND aws_disk.year = {{year}} + AND aws_disk.month = {{month}} + AND aws_disk.ocp_source = {{ocp_provider_uuid}} +GROUP BY aws.row_uuid, ocp.namespace, ocp.pod_labels, ocp.volume_labels +RETURNING 1; + +-- Unattributed Storage Cost: +-- ((Disk Capacity - Sum(PV capacity) / Disk Capacity) * Cost of Disk +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + usage_amount, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + resource_id_matched, + source, + ocp_source, + year, + month +) +WITH cte_total_pv_capacity as ( + SELECT + aws_resource_id, + SUM(combined_requests.capacity) as total_pv_capacity, + count(distinct cluster_id) as cluster_count + FROM ( + SELECT + ocp.persistentvolume, + max(ocp.persistentvolumeclaim_capacity_gigabyte) as capacity, + aws.resource_id as aws_resource_id, + ocp.cluster_id + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + ON (aws.usage_start = ocp.usage_start) + AND strpos(aws.resource_id, ocp.csi_volume_handle) > 0 + AND ocp.csi_volume_handle is not null + AND ocp.csi_volume_handle != '' + WHERE ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + AND aws.resource_id_matched = True + GROUP BY ocp.persistentvolume, aws.resource_id, ocp.cluster_id + ) as combined_requests group by aws_resource_id +) +SELECT uuid_generate_v4()::text as row_uuid, -- need a new uuid or it will deduplicate + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + 'Storage' as data_source, + 'Storage unattributed' as namespace, + max(ocp.persistentvolumeclaim) as persistentvolumeclaim, + max(ocp.persistentvolume) as persistentvolume, + max(ocp.storageclass) as storageclass, + max(aws.resource_id) as resource_id, + max(aws.usage_start) as usage_start, + max(aws.usage_start) as usage_end, + max(aws.product_code) as product_code, + max(aws.product_family) as product_family, + max(aws.instance_type) as instance_type, + max(aws.usage_account_id) as usage_account_id, + max(aws.availability_zone) as availability_zone, + max(aws.region) as region, + max(aws.unit) as unit, + cast(NULL as double precision) as usage_amount, + max(aws.currency_code) as currency_code, + (max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.unblended_cost) / max(pv_cap.cluster_count) as unblended_cost, + ((max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.unblended_cost)) * cast({{markup}} as decimal(24,9)) / max(pv_cap.cluster_count) as markup_cost, + (max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.blended_cost) / max(pv_cap.cluster_count) as blended_cost, + ((max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.blended_cost)) * cast({{markup}} as decimal(24,9)) / max(pv_cap.cluster_count) as markup_cost_blended, + (max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.savingsplan_effective_cost) / max(pv_cap.cluster_count) as savingsplan_effective_cost, + ((max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.savingsplan_effective_cost)) * cast({{markup}} as decimal(24,9)) / max(pv_cap.cluster_count) as markup_cost_savingsplan, + (max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.calculated_amortized_cost) / max(pv_cap.cluster_count) as calculated_amortized_cost, + ((max(aws_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(aws_disk.capacity) * max(aws.calculated_amortized_cost)) * cast({{markup}} as decimal(24,9)) / max(pv_cap.cluster_count) as markup_cost_amortized, + bool_or(aws.resource_id_matched) as resource_id_matched, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(aws.year) as year, + max(aws.month) as month +FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp +JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.csi_volume_handle) != 0 + AND ocp.csi_volume_handle is not null + AND ocp.csi_volume_handle != '' + AND ocp.source = aws.ocp_source +JOIN {{schema | sqlsafe}}.managed_aws_openshift_disk_capacities_temp AS aws_disk + ON aws_disk.usage_start = aws.usage_start + AND aws_disk.resource_id = aws.resource_id +LEFT JOIN cte_total_pv_capacity as pv_cap + ON pv_cap.aws_resource_id = aws.resource_id +WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND ocp.persistentvolume is not null + AND aws.source = {{cloud_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + AND aws.data_transfer_direction IS NULL + AND aws.resource_id_matched = True + AND ocp.namespace != 'Storage unattributed' + AND aws_disk.capacity != pv_cap.total_pv_capacity -- prevent inserting zero cost rows + AND aws_disk.year = {{year}} + AND aws_disk.month = {{month}} + AND aws_disk.ocp_source = {{ocp_provider_uuid}} +GROUP BY aws.row_uuid, aws.resource_id +RETURNING 1; + +-- Direct resource_id matching +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + usage_amount, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_effective_usage_cpu_core_hours, + pod_effective_usage_memory_gigabyte_hours, + node_capacity_cpu_core_hours, + node_capacity_memory_gigabyte_hours, + pod_labels, + tags, + aws_cost_category, + cost_category_id, + resource_id_matched, + source, + ocp_source, + year, + month +) +SELECT aws.row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + 'Pod' as data_source, + ocp.namespace, + max(ocp.node) as node, + max(aws.resource_id) as resource_id, + max(aws.usage_start) as usage_start, + max(aws.usage_start) as usage_end, + max(aws.product_code) as product_code, + max(aws.product_family) as product_family, + max(aws.instance_type) as instance_type, + max(aws.usage_account_id) as usage_account_id, + max(aws.availability_zone) as availability_zone, + max(aws.region) as region, + max(aws.unit) as unit, + max(aws.usage_amount) as usage_amount, + max(aws.currency_code) as currency_code, + max(aws.unblended_cost) as unblended_cost, + max(aws.unblended_cost) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(aws.blended_cost) as blended_cost, + max(aws.blended_cost) * cast({{markup}} as decimal(33,15)) as markup_cost_blended, + max(aws.savingsplan_effective_cost) as savingsplan_effective_cost, + max(aws.savingsplan_effective_cost) * cast({{markup}} as decimal(33,15)) as markup_cost_savingsplan, + max(aws.calculated_amortized_cost) as calculated_amortized_cost, + max(aws.calculated_amortized_cost) * cast({{markup}} as decimal(33,9)) as markup_cost_amortized, + sum(ocp.pod_effective_usage_cpu_core_hours) as pod_effective_usage_cpu_core_hours, + sum(ocp.pod_effective_usage_memory_gigabyte_hours) as pod_effective_usage_memory_gigabyte_hours, + max(ocp.node_capacity_cpu_core_hours) as node_capacity_cpu_core_hours, + max(ocp.node_capacity_memory_gigabyte_hours) as node_capacity_memory_gigabyte_hours, + ocp.pod_labels::jsonb as pod_labels, + max(aws.tags) as tags, + max(aws.aws_cost_category) as aws_cost_category, + max(ocp.cost_category_id) as cost_category_id, + bool_or(aws.resource_id_matched) as resource_id_matched, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(aws.year) as year, + max(aws.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.resource_id) != 0 + LEFT JOIN {{schema | sqlsafe}}.managed_aws_openshift_disk_capacities_temp AS aws_disk + ON aws_disk.usage_start = aws.usage_start + AND aws_disk.resource_id = aws.resource_id + AND aws_disk.year = aws.year + AND aws_disk.month = aws.month + AND aws_disk.ocp_source = aws.ocp_source + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '') + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.source = {{cloud_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + AND aws.resource_id_matched = True + -- Filter out Node Network Costs since they cannot be attributed to a namespace and are accounted for later + AND aws.data_transfer_direction IS NULL + AND aws_disk.resource_id is NULL -- exclude any resource used in disk capacity calculations + GROUP BY aws.row_uuid, ocp.namespace, ocp.pod_labels +RETURNING 1; + +-- Tag matching +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + usage_amount, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_labels, + volume_labels, + tags, + aws_cost_category, + cost_category_id, + resource_id_matched, + matched_tag, + source, + ocp_source, + year, + month +) +SELECT aws.row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + ocp.data_source, + ocp.namespace, + max(ocp.node) as node, + max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim, + max(nullif(ocp.persistentvolume, '')) as persistentvolume, + max(nullif(ocp.storageclass, '')) as storageclass, + max(aws.resource_id) as resource_id, + max(aws.usage_start) as usage_start, + max(aws.usage_start) as usage_end, + max(aws.product_code) as product_code, + max(aws.product_family) as product_family, + max(aws.instance_type) as instance_type, + max(aws.usage_account_id) as usage_account_id, + max(aws.availability_zone) as availability_zone, + max(aws.region) as region, + max(aws.unit) as unit, + max(aws.usage_amount) as usage_amount, + max(aws.currency_code) as currency_code, + max(aws.unblended_cost) as unblended_cost, + max(aws.unblended_cost) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(aws.blended_cost) as blended_cost, + max(aws.blended_cost) * cast({{markup}} as decimal(33,15)) as markup_cost_blended, + max(aws.savingsplan_effective_cost) as savingsplan_effective_cost, + max(aws.savingsplan_effective_cost) * cast({{markup}} as decimal(33,15)) as markup_cost_savingsplan, + max(aws.calculated_amortized_cost) as calculated_amortized_cost, + max(aws.calculated_amortized_cost) * cast({{markup}} as decimal(33,9)) as markup_cost_amortized, + max(ocp.pod_labels)::jsonb as pod_labels, + max(ocp.volume_labels)::jsonb as volume_labels, + max(aws.tags) as tags, + max(aws.aws_cost_category) as aws_cost_category, + max(ocp.cost_category_id) as cost_category_id, + FALSE as resource_id_matched, + max(aws.matched_tag) as matched_tag, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(aws.year) as year, + max(aws.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp as aws + ON aws.usage_start = ocp.usage_start + AND ( + aws.tags::jsonb->>'openshift_project' = ocp.namespace + OR aws.tags::jsonb->>'openshift_node' = ocp.node + OR aws.tags::jsonb->>'openshift_cluster' = ocp.cluster_alias + OR aws.tags::jsonb->>'openshift_cluster' = ocp.cluster_id + OR (aws.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(aws.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.pod_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + OR (aws.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(aws.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.volume_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + ) + AND namespace != 'Worker unallocated' + AND namespace != 'Platform unallocated' + AND namespace != 'Storage unattributed' + AND namespace != 'Network unattributed' + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.source = {{cloud_provider_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + AND aws.resource_id_matched = FALSE + AND aws.matched_tag is not null and aws.matched_tag != '' + GROUP BY aws.row_uuid, ocp.namespace, ocp.data_source +RETURNING 1; + +{%- if distribution == 'cpu' -%} +{%- set pod_column = 'pod_effective_usage_cpu_core_hours' -%} +{%- set node_column = 'node_capacity_cpu_core_hours' -%} +{%- else -%} +{%- set pod_column = 'pod_effective_usage_memory_gigabyte_hours' -%} +{%- set node_column = 'node_capacity_memory_gigabyte_hours' -%} +{%- endif -%} +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + tags, + aws_cost_category, + cost_category_id, + resource_id_matched, + matched_tag, + source, + ocp_source, + year, + month, + day +) +WITH cte_cluster_counts AS ( + -- Count distinct clusters matching each AWS resource for tag-matched resources + -- This must query source OCP data to get accurate count across all OCP sources, + -- not just the temp table which only contains data for the current OCP source. + -- We use the temp table to get tags/matched_tag, but join to ALL OCP sources to count matches. + SELECT aws_temp.row_uuid, + count(DISTINCT ocp.source) as cluster_count + FROM {{schema | sqlsafe}}.managed_aws_openshift_daily_temp AS aws_temp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + ON aws_temp.usage_start = ocp.usage_start + AND ( + aws_temp.tags::jsonb->>'openshift_project' = ocp.namespace + OR aws_temp.tags::jsonb->>'openshift_node' = ocp.node + OR aws_temp.tags::jsonb->>'openshift_cluster' = ocp.cluster_alias + OR aws_temp.tags::jsonb->>'openshift_cluster' = ocp.cluster_id + OR (aws_temp.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(aws_temp.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.pod_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + OR (aws_temp.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(aws_temp.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.volume_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + ) + AND ocp.namespace != 'Worker unallocated' + AND ocp.namespace != 'Platform unallocated' + AND ocp.namespace != 'Storage unattributed' + AND ocp.namespace != 'Network unattributed' + WHERE aws_temp.source = {{cloud_provider_uuid}} + AND aws_temp.year = {{year}} + AND aws_temp.month = {{month}} + AND aws_temp.resource_id_matched = FALSE + AND aws_temp.matched_tag IS NOT NULL + AND aws_temp.matched_tag != '' + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND ocp.day IN {{days | inclause}} + -- Don't filter by ocp.source here - we want to count ALL matching OCP sources + GROUP BY aws_temp.row_uuid +), +cte_rankings AS ( + SELECT pds.row_uuid, + count(*) as aws_uuid_count, + COALESCE(ccc.cluster_count, 1) as cluster_count + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp AS pds + LEFT JOIN cte_cluster_counts AS ccc + ON pds.row_uuid = ccc.row_uuid + WHERE pds.ocp_source = {{ocp_provider_uuid}} AND year = {{year}} AND month = {{month}} + GROUP BY pds.row_uuid, ccc.cluster_count +) +SELECT pds.row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + aa.id as account_alias_id, + availability_zone, + region, + unit, + -- For tag-matched resources, split cost across clusters; for resource_id-matched, only split within cluster + CASE WHEN pds.resource_id_matched = FALSE + THEN usage_amount / (r.aws_uuid_count * r.cluster_count) + ELSE usage_amount / r.aws_uuid_count + END as usage_amount, + NULL AS data_transfer_direction, + currency_code, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * unblended_cost + WHEN resource_id_matched = FALSE + THEN unblended_cost / (r.aws_uuid_count * r.cluster_count) + ELSE unblended_cost / r.aws_uuid_count + END as unblended_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * unblended_cost * cast({{markup}} as decimal(24,9)) + WHEN resource_id_matched = FALSE + THEN unblended_cost / (r.aws_uuid_count * r.cluster_count) * cast({{markup}} as decimal(24,9)) + ELSE unblended_cost / r.aws_uuid_count * cast({{markup}} as decimal(24,9)) + END as markup_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * blended_cost + WHEN resource_id_matched = FALSE + THEN blended_cost / (r.aws_uuid_count * r.cluster_count) + ELSE blended_cost / r.aws_uuid_count + END as blended_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * blended_cost * cast({{markup}} as decimal(24,9)) + WHEN resource_id_matched = FALSE + THEN blended_cost / (r.aws_uuid_count * r.cluster_count) * cast({{markup}} as decimal(24,9)) + ELSE blended_cost / r.aws_uuid_count * cast({{markup}} as decimal(24,9)) + END as markup_cost_blended, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * savingsplan_effective_cost + WHEN resource_id_matched = FALSE + THEN savingsplan_effective_cost / (r.aws_uuid_count * r.cluster_count) + ELSE savingsplan_effective_cost / r.aws_uuid_count + END as savingsplan_effective_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * savingsplan_effective_cost * cast({{markup}} as decimal(24,9)) + WHEN resource_id_matched = FALSE + THEN savingsplan_effective_cost / (r.aws_uuid_count * r.cluster_count) * cast({{markup}} as decimal(24,9)) + ELSE savingsplan_effective_cost / r.aws_uuid_count * cast({{markup}} as decimal(24,9)) + END as markup_cost_savingsplan, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * calculated_amortized_cost + WHEN resource_id_matched = FALSE + THEN calculated_amortized_cost / (r.aws_uuid_count * r.cluster_count) + ELSE calculated_amortized_cost / r.aws_uuid_count + END as calculated_amortized_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * calculated_amortized_cost * cast({{markup}} as decimal(33,9)) + WHEN resource_id_matched = FALSE + THEN calculated_amortized_cost / (r.aws_uuid_count * r.cluster_count) * cast({{markup}} as decimal(33,9)) + ELSE calculated_amortized_cost / r.aws_uuid_count * cast({{markup}} as decimal(33,9)) + END as markup_cost_amortized, + CASE WHEN pds.pod_labels IS NOT NULL + THEN ( + SELECT json_object_agg(key, value)::text + FROM ( + SELECT * FROM jsonb_each_text(pds.pod_labels::jsonb) + UNION ALL + SELECT * FROM jsonb_each_text(pds.tags::jsonb) + ) combined(key, value) + ) + ELSE ( + SELECT json_object_agg(key, value)::text + FROM ( + SELECT * FROM jsonb_each_text(pds.volume_labels::jsonb) + UNION ALL + SELECT * FROM jsonb_each_text(pds.tags::jsonb) + ) combined(key, value) + ) + END as tags, + aws_cost_category, + cost_category_id, + pds.resource_id_matched, + pds.matched_tag, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + pds.year as year, + pds.month as month, + EXTRACT(DAY FROM usage_start)::text as day +FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary_temp AS pds +JOIN cte_rankings as r + ON pds.row_uuid = r.row_uuid +LEFT JOIN {{schema | sqlsafe}}.reporting_awsaccountalias AS aa + ON pds.usage_account_id = aa.account_id +WHERE pds.ocp_source = {{ocp_provider_uuid}} AND year = {{year}} AND month = {{month}} +RETURNING 1; + +-- Put Node Network Costs into the Network unattributed namespace +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + tags, + source, + ocp_source, + year, + month, + day +) +SELECT + aws.row_uuid AS row_uuid, + max(cluster_id), + max(cluster_alias), + max(data_source), + 'Network unattributed' AS namespace, + ocp.node AS node, + max(persistentvolumeclaim), + max(persistentvolume), + max(storageclass), + max(aws.resource_id), + max(aws.usage_start), + max(usage_end), + max(product_code), + max(product_family), + max(instance_type), + max(usage_account_id), + max(aa.id) AS account_alias_id, + max(availability_zone), + max(region), + max(unit), + max(usage_amount), + data_transfer_direction, + max(currency_code), + max(unblended_cost), + max(unblended_cost) * cast({{markup}} AS decimal(24,9)), + max(blended_cost), + max(blended_cost) * cast({{markup}} AS decimal(24,9)), + max(savingsplan_effective_cost), + max(savingsplan_effective_cost) * cast({{markup}} AS decimal(24,9)), + max(calculated_amortized_cost), + max(calculated_amortized_cost) * cast({{markup}} AS decimal(33,9)), + max(aws.tags) as tags, + max({{cloud_provider_uuid}}) AS source, + max({{ocp_provider_uuid}}) AS ocp_source, + max(aws.year) as year, + max(aws.month) as month, + max(cast(EXTRACT(DAY FROM aws.usage_start) AS varchar)) AS day +FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging AS ocp +JOIN {{schema | sqlsafe}}.managed_aws_openshift_daily_temp AS aws + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.resource_id) != 0 +LEFT JOIN {{schema | sqlsafe}}.reporting_awsaccountalias AS aa + ON aws.usage_account_id = aa.account_id +WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '') + AND aws.ocp_source = {{ocp_provider_uuid}} + AND aws.resource_id_matched = True + AND aws.year = {{year}} + AND aws.month = {{month}} + AND aws.data_transfer_direction IS NOT NULL + AND aws.data_transfer_direction != '' + AND ocp.data_source = 'Pod' +GROUP BY + aws.row_uuid, + ocp.node, + aws.data_transfer_direction +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/3_reporting_ocpawscostlineitem_project_daily_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/3_reporting_ocpawscostlineitem_project_daily_summary_p.sql new file mode 100755 index 0000000000..719cf99355 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/populate_daily_summary/3_reporting_ocpawscostlineitem_project_daily_summary_p.sql @@ -0,0 +1,162 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + cost_entry_bill_id, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_labels, + tags, + aws_cost_category, + cost_category_id, + source_uuid +) +with cte_pg_enabled_keys as ( + select array['vm_kubevirt_io_name'] || array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type IN ('AWS', 'OCP') +), +filtered_data as ( + SELECT cluster_id, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(tags::jsonb) WHERE key = ANY(pek.keys))::jsonb AS enabled_tags, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + date(usage_start) as usage_start, + product_code, + product_family, + instance_type, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + CASE + WHEN upper(data_transfer_direction) = 'IN' THEN usage_amount + ELSE 0 + END AS infrastructure_data_in_gigabytes, + CASE + WHEN upper(data_transfer_direction) = 'OUT' THEN usage_amount + ELSE 0 + END AS infrastructure_data_out_gigabytes, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + aws_cost_category::jsonb as aws_cost_category, + cost_category_id +FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary +CROSS JOIN cte_pg_enabled_keys AS pek +WHERE source = {{cloud_provider_uuid}} + AND ocp_source = {{ocp_provider_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day IN {{days | inclause}} +) +SELECT + uuid_generate_v4(), + MAX({{report_period_id | sqlsafe}}) as report_period_id, + cluster_id, + MAX(cluster_alias) as cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + fd.usage_start as usage_start, + fd.usage_start as usage_end, + product_code, + product_family, + instance_type, + MAX({{bill_id | sqlsafe}}) as cost_entry_bill_id, + usage_account_id, + MAX(account_alias_id) as account_alias_id, + availability_zone, + region, + unit, + SUM(usage_amount) as usage_amount, + SUM(fd.infrastructure_data_in_gigabytes) as infrastructure_data_in_gigabytes, + SUM(fd.infrastructure_data_out_gigabytes) as infrastructure_data_out_gigabytes, + data_transfer_direction, + MAX(currency_code) as currency_code, + SUM(unblended_cost) as unblended_cost, + SUM(markup_cost) as markup_cost, + SUM(blended_cost) as blended_cost, + SUM(markup_cost_blended) as markup_cost_blended, + SUM(savingsplan_effective_cost) as savingsplan_effective_cost, + SUM(markup_cost_savingsplan) as markup_cost_savingsplan, + SUM(calculated_amortized_cost) as calculated_amortized_cost, + SUM(markup_cost_amortized) as markup_cost_amortized, + fd.enabled_tags as pod_labels, + fd.enabled_tags as tags, + fd.aws_cost_category as aws_cost_category, + cost_category_id, + {{cloud_provider_uuid}}::uuid as source_uuid +FROM filtered_data as fd +GROUP BY + fd.usage_start, + cluster_id, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + data_transfer_direction, + currency_code, + fd.enabled_tags, + fd.aws_cost_category, + cost_category_id +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/reporting_ocpaws_matched_tags.sql b/koku/masu/database/self_hosted_sql/aws/openshift/reporting_ocpaws_matched_tags.sql new file mode 100755 index 0000000000..e7badab217 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/reporting_ocpaws_matched_tags.sql @@ -0,0 +1,64 @@ +WITH cte_enabled_tag_keys AS ( + SELECT array_agg(key) as key_array + FROM ( + SELECT key, + count(provider_type) AS p_count + FROM {{schema | sqlsafe}}.reporting_enabledtagkeys + WHERE enabled = true + AND provider_type IN ('AWS', 'OCP') + GROUP BY key + ) c + WHERE c.p_count > 1 +), +cte_unnested_aws_tags AS ( + SELECT DISTINCT key, + value + FROM {{schema | sqlsafe}}.aws_line_items_daily AS aws + CROSS JOIN LATERAL jsonb_each_text(aws.resourcetags::jsonb) AS tags(key, value) + JOIN cte_enabled_tag_keys AS etk + ON EXISTS ( + SELECT 1 + FROM unnest(etk.key_array) AS enabled_key + WHERE strpos(aws.resourcetags, enabled_key) != 0 + ) + WHERE source = {{aws_source_uuid}} + AND year = {{year}} + AND month = {{month}} + AND lineitem_usagestartdate >= {{start_date}} + AND lineitem_usagestartdate < {{end_date}} + INTERVAL '1 day' +), +cte_unnested_ocp_tags AS ( + SELECT DISTINCT pod_key, + pod_value, + volume_key, + volume_value + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging AS ocp + CROSS JOIN LATERAL jsonb_each_text(COALESCE(ocp.pod_labels::jsonb, '{}'::jsonb)) AS pod_tags(pod_key, pod_value) + CROSS JOIN LATERAL jsonb_each_text(COALESCE(ocp.volume_labels::jsonb, '{}'::jsonb)) AS volume_tags(volume_key, volume_value) + JOIN cte_enabled_tag_keys AS etk + ON EXISTS ( + SELECT 1 + FROM unnest(etk.key_array) AS enabled_key + WHERE strpos(ocp.pod_labels, enabled_key) != 0 + OR strpos(ocp.volume_labels, enabled_key) != 0 + ) + WHERE source IN {{ocp_source_uuids | inclause}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} + AND day IN {{days | inclause}} +) +SELECT '{"' || key || '": "' || value || '"}' as tag +FROM ( + SELECT DISTINCT aws.key, + aws.value + FROM cte_unnested_aws_tags AS aws + JOIN cte_unnested_ocp_tags AS ocp + ON ( + lower(aws.key) = lower(ocp.pod_key) + AND lower(aws.value) = lower(ocp.pod_value) + ) + OR ( + lower(aws.key) = lower(ocp.volume_key) + AND lower(aws.value) = lower(ocp.volume_value) + ) +) AS matches diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_compute_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_compute_summary_p.sql new file mode 100755 index 0000000000..0860dd52f7 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_compute_summary_p.sql @@ -0,0 +1,55 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_compute_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + instance_type, + resource_id, + usage_amount, + unit, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + instance_type, + resource_id, + sum(usage_amount), + max(unit), + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND instance_type IS NOT NULL + GROUP BY usage_start, usage_account_id, account_alias_id, instance_type, resource_id +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_account_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_account_p.sql new file mode 100755 index 0000000000..10e09ae583 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_account_p.sql @@ -0,0 +1,46 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_account_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, usage_account_id, account_alias_id +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_region_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_region_p.sql new file mode 100755 index 0000000000..21ab7dc113 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_region_p.sql @@ -0,0 +1,50 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_region_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + region, + availability_zone, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + region, + availability_zone, + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, usage_account_id, account_alias_id, region, availability_zone +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_service_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_service_p.sql new file mode 100755 index 0000000000..76c30d44a8 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_by_service_p.sql @@ -0,0 +1,50 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_service_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + product_code, + product_family, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + product_code, + product_family, + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, usage_account_id, account_alias_id, product_code, product_family +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_p.sql new file mode 100755 index 0000000000..3d325f1451 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_cost_summary_p.sql @@ -0,0 +1,44 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_cost_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid, + cost_category_id +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid, + max(cost_category_id) + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_database_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_database_summary_p.sql new file mode 100755 index 0000000000..85428bc8c2 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_database_summary_p.sql @@ -0,0 +1,53 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_database_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + product_code, + usage_amount, + unit, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + product_code, + sum(usage_amount), + max(unit), + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND product_code IN ('AmazonRDS','AmazonDynamoDB','AmazonElastiCache','AmazonNeptune','AmazonRedshift','AmazonDocumentDB') + GROUP BY usage_start, usage_account_id, account_alias_id, product_code +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_network_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_network_summary_p.sql new file mode 100755 index 0000000000..232449c767 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_network_summary_p.sql @@ -0,0 +1,53 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_network_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + product_code, + usage_amount, + unit, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + product_code, + sum(usage_amount), + max(unit), + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND product_code IN ('AmazonVPC','AmazonCloudFront','AmazonRoute53','AmazonAPIGateway') + GROUP BY usage_start, usage_account_id, account_alias_id, product_code +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_storage_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_storage_summary_p.sql new file mode 100755 index 0000000000..2a7daccf14 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpaws_storage_summary_p.sql @@ -0,0 +1,54 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpaws_storage_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + usage_account_id, + account_alias_id, + product_family, + usage_amount, + unit, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + currency_code, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + usage_account_id, + max(account_alias_id), + product_family, + sum(usage_amount), + max(unit), + sum(unblended_cost), + sum(markup_cost), + sum(blended_cost), + sum(markup_cost_blended), + sum(savingsplan_effective_cost), + sum(markup_cost_savingsplan), + sum(calculated_amortized_cost), + sum(markup_cost_amortized), + max(currency_code), + {{aws_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary + WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND product_family LIKE '%%Storage%%' + AND unit = 'GB-Mo' + GROUP BY usage_start, usage_account_id, account_alias_id, product_family +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpawscostlineitem_project_daily_summary_p.sql b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpawscostlineitem_project_daily_summary_p.sql new file mode 100755 index 0000000000..bbb36bd9e6 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/openshift/ui_summary/reporting_ocpawscostlineitem_project_daily_summary_p.sql @@ -0,0 +1,104 @@ +-- insert managed table data into postgres table + +INSERT INTO {{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + cost_entry_bill_id, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_labels, + tags, + aws_cost_category, + cost_category_id, + source_uuid +) +with cte_pg_enabled_keys as ( + select array['vm_kubevirt_io_name'] || array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type IN ('AWS', 'OCP') +) +SELECT uuid_generate_v4(), + {{report_period_id | sqlsafe}} as report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + date(usage_start), + date(usage_end), + product_code, + product_family, + instance_type, + {{bill_id | sqlsafe}} as cost_entry_bill_id, + usage_account_id, + account_alias_id, + availability_zone, + region, + unit, + usage_amount, + CASE + WHEN upper(data_transfer_direction) = 'IN' THEN usage_amount + ELSE 0 + END AS infrastructure_data_in_gigabytes, + CASE + WHEN upper(data_transfer_direction) = 'OUT' THEN usage_amount + ELSE 0 + END AS infrastructure_data_out_gigabytes, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_labels::jsonb, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(tags::jsonb) WHERE key = ANY(pek.keys))::jsonb AS tags, + aws_cost_category::jsonb, + cost_category_id, + source::UUID +FROM {{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary +CROSS JOIN cte_pg_enabled_keys AS pek +WHERE source = {{aws_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day IN {{days | inclause}} +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_daily_summary.sql b/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_daily_summary.sql new file mode 100755 index 0000000000..51f597dcac --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_daily_summary.sql @@ -0,0 +1,152 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_awscostentrylineitem_daily_summary ( + uuid, + cost_entry_bill_id, + usage_start, + usage_end, + usage_account_id, + product_code, + product_family, + availability_zone, + region, + instance_type, + unit, + resource_ids, + resource_count, + usage_amount, + normalization_factor, + normalized_usage_amount, + currency_code, + unblended_rate, + unblended_cost, + blended_rate, + blended_cost, + savingsplan_effective_cost, + calculated_amortized_cost, + public_on_demand_cost, + public_on_demand_rate, + tags, + cost_category, + account_alias_id, + organizational_unit_id, + source_uuid, + markup_cost, + markup_cost_blended, + markup_cost_savingsplan, + markup_cost_amortized +) +with cte_pg_enabled_keys as ( + select array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type = 'AWS' +) +SELECT uuid_generate_v4() as uuid, + INTEGER '{{bill_id | sqlsafe}}' as cost_entry_bill_id, + usage_start, + usage_end, + cast(usage_account_id AS varchar(50)), + cast(product_code AS varchar), + product_family, + cast(availability_zone AS varchar(50)), + region, + instance_type, + unit, + resource_ids, + resource_count, + cast(usage_amount AS decimal(24,9)), + normalization_factor, + normalized_usage_amount, + cast(currency_code AS varchar(10)), + cast(unblended_rate AS decimal(24,9)), + cast(unblended_cost AS decimal(24,9)), + cast(blended_rate AS decimal(24,9)), + cast(blended_cost AS decimal(24,9)), + cast(savingsplan_effective_cost AS decimal(24,9)), + cast(calculated_amortized_cost AS decimal(33, 9)), + cast(public_on_demand_cost AS decimal(24,9)), + cast(public_on_demand_rate AS decimal(24,9)), + (SELECT json_object_agg(key, value) FROM jsonb_each_text(tags::jsonb) WHERE key = ANY(pek.keys))::jsonb as tags, + costcategory::jsonb as cost_category, + aa.id as account_alias_id, + ou.id as organizational_unit_id, + '{{source_uuid | sqlsafe}}'::uuid as source_uuid, + cast(unblended_cost * {{markup | sqlsafe}} AS decimal(24,9)) as markup_cost, + cast(blended_cost * {{markup | sqlsafe}} AS decimal(33,15)) as markup_cost_blended, + cast(savingsplan_effective_cost * {{markup | sqlsafe}} AS decimal(33,15)) as markup_cost_savingsplan, + cast(calculated_amortized_cost * {{markup | sqlsafe}} AS decimal(33,9)) as markup_cost_amortized +FROM ( + SELECT date(lineitem_usagestartdate) as usage_start, + date(lineitem_usagestartdate) as usage_end, + CASE + WHEN bill_billingentity='AWS Marketplace' THEN coalesce(nullif(product_productname, ''), nullif(lineitem_productcode, '')) + ELSE nullif(lineitem_productcode, '') + END as product_code, + nullif(product_productfamily, '') as product_family, + lineitem_usageaccountid as usage_account_id, + nullif(lineitem_availabilityzone, '') as availability_zone, + nullif(product_region, '') as region, + resourcetags as tags, + costcategory, + nullif(product_instancetype, '') as instance_type, + nullif(pricing_unit, '') as unit, + -- SavingsPlanNegation needs to be negated to prevent duplicate usage COST-5369 + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanNegation' + THEN 0.0 + ELSE lineitem_usageamount + END + ) as usage_amount, + max(lineitem_normalizationfactor) as normalization_factor, + sum(lineitem_normalizedusageamount) as normalized_usage_amount, + max(lineitem_currencycode) as currency_code, + max(lineitem_unblendedrate) as unblended_rate, + sum(lineitem_unblendedcost) as unblended_cost, + max(lineitem_blendedrate) as blended_rate, + sum(lineitem_blendedcost) as blended_cost, + sum(savingsplan_savingsplaneffectivecost) as savingsplan_effective_cost, + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanCoveredUsage' + OR lineitem_lineitemtype='SavingsPlanNegation' + OR lineitem_lineitemtype='SavingsPlanUpfrontFee' + OR lineitem_lineitemtype='SavingsPlanRecurringFee' + THEN savingsplan_savingsplaneffectivecost + ELSE lineitem_unblendedcost + END + ) as calculated_amortized_cost, + sum(pricing_publicondemandcost) as public_on_demand_cost, + max(pricing_publicondemandrate) as public_on_demand_rate, + array_agg(DISTINCT lineitem_resourceid) as resource_ids, + count(DISTINCT lineitem_resourceid) as resource_count + FROM {{schema | sqlsafe}}.aws_line_items_daily + WHERE source = '{{source_uuid | sqlsafe}}' + AND year = '{{year | sqlsafe}}' + AND month = '{{month | sqlsafe}}' + AND lineitem_usagestartdate >= '{{start_date | sqlsafe}}'::timestamp + AND lineitem_usagestartdate < '{{end_date | sqlsafe}}'::timestamp + INTERVAL '1 day' + GROUP BY date(lineitem_usagestartdate), + bill_billingentity, + lineitem_productcode, + product_productname, + lineitem_usageaccountid, + lineitem_availabilityzone, + product_productfamily, + product_region, + resourcetags, + costcategory, + product_instancetype, + pricing_unit +) AS ds +CROSS JOIN cte_pg_enabled_keys AS pek +LEFT JOIN {{schema | sqlsafe}}.reporting_awsaccountalias AS aa + ON ds.usage_account_id = aa.account_id +LEFT JOIN {{schema | sqlsafe}}.reporting_awsorganizationalunit AS ou + ON aa.id = ou.account_alias_id + AND ou.provider_id = '{{source_uuid | sqlsafe}}'::uuid + AND ou.created_timestamp <= ds.usage_start + AND ( + ou.deleted_timestamp is NULL + OR ou.deleted_timestamp > ds.usage_start + ) +RETURNING 1 diff --git a/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_summary_by_ec2_compute_p.sql b/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_summary_by_ec2_compute_p.sql new file mode 100755 index 0000000000..39890603b0 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/reporting_awscostentrylineitem_summary_by_ec2_compute_p.sql @@ -0,0 +1,177 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_awscostentrylineitem_summary_by_ec2_compute_p ( + uuid, + usage_start, + usage_end, + usage_account_id, + resource_id, + instance_name, + instance_type, + operating_system, + region, + vcpu, + memory, + tags, + cost_category, + unit, + usage_amount, + normalization_factor, + normalized_usage_amount, + currency_code, + unblended_rate, + unblended_cost, + markup_cost, + blended_rate, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + public_on_demand_cost, + public_on_demand_rate, + source_uuid, + cost_entry_bill_id, + account_alias_id +) +with cte_pg_enabled_keys as ( + select array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type = 'AWS' +), +cte_latest_values as ( + SELECT + lineitem_resourceid as resource_id, + nullif(product_instancetype, '') as instance_type, + max(resourcetags::jsonb->>'Name') AS instance_name, + resourcetags as tags, + costcategory as cost_category, + nullif(product_memory, '') as memory, + cast(nullif(product_vcpu, '') AS INTEGER) as vcpu + FROM {{schema | sqlsafe}}.aws_line_items_daily as alid + WHERE source = '{{source_uuid | sqlsafe}}' + AND year = '{{year | sqlsafe}}' + AND month = '{{month | sqlsafe}}' + AND lineitem_productcode = 'AmazonEC2' + AND product_productfamily LIKE '%%Compute Instance%%' + AND lineitem_resourceid != '' + AND lineitem_usagestartdate = ( + SELECT max(date(lv.lineitem_usagestartdate)) AS usage_start + FROM {{schema | sqlsafe}}.aws_line_items_daily AS lv + WHERE lineitem_resourceid = alid.lineitem_resourceid + AND year = '{{year | sqlsafe}}' + AND month = '{{month | sqlsafe}}' + AND source = '{{source_uuid | sqlsafe}}' + ) + GROUP BY + lineitem_resourceid, + product_instancetype, + resourcetags, + costcategory, + product_memory, + product_vcpu +) + +SELECT uuid_generate_v4() as uuid, + usage_start, + usage_end, + cast(usage_account_id AS varchar(50)), + cte_l.resource_id, + cte_l.instance_name, + cte_l.instance_type, + operating_system, + region, + cte_l.vcpu, + cte_l.memory, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(cte_l.tags::jsonb) WHERE key = ANY(pek.keys))::jsonb as tags, + cte_l.cost_category::jsonb as cost_category, + unit, + cast(usage_amount as decimal(24,9)) as usage_amount, + normalization_factor, + normalized_usage_amount, + cast(currency_code AS varchar(10)), + cast(unblended_rate AS decimal(24,9)), + cast(unblended_cost AS decimal(24,9)), + cast(unblended_cost * {{markup | sqlsafe}} AS decimal(24,9)) as markup_cost, + cast(blended_rate AS decimal(24,9)), + cast(blended_cost AS decimal(24,9)), + cast(blended_cost * {{markup | sqlsafe}} AS decimal(33,15)) as markup_cost_blended, + cast(savingsplan_effective_cost AS decimal(24,9)), + cast(savingsplan_effective_cost * {{markup | sqlsafe}} AS decimal(33,15)) as markup_cost_savingsplan, + cast(calculated_amortized_cost AS decimal(33, 9)), + cast(calculated_amortized_cost * {{markup | sqlsafe}} AS decimal(33,9)) as markup_cost_amortized, + cast(public_on_demand_cost AS decimal(24,9)), + cast(public_on_demand_rate AS decimal(24,9)), + '{{source_uuid | sqlsafe}}'::uuid as source_uuid, + INTEGER '{{bill_id | sqlsafe}}' as cost_entry_bill_id, + aa.id as account_alias_id +FROM ( + SELECT min(date(lineitem_usagestartdate)) as usage_start, + max(date(lineitem_usagestartdate)) as usage_end, + max(lineitem_usageaccountid) as usage_account_id, + lineitem_resourceid as resource_id, + max(nullif(product_operatingsystem, '')) as operating_system, + max(nullif(product_region, '')) as region, + max(nullif(pricing_unit, '')) as unit, + -- SavingsPlanNegation needs to be negated to prevent duplicate usage COST-5369 + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanNegation' + THEN 0.0 + ELSE lineitem_usageamount + END + ) as usage_amount, + max(lineitem_normalizationfactor) as normalization_factor, + sum(lineitem_normalizedusageamount) as normalized_usage_amount, + max(lineitem_currencycode) as currency_code, + max(lineitem_unblendedrate) as unblended_rate, + /* SavingsPlanCoveredUsage entries have corresponding SavingsPlanNegation line items + that offset that cost. + https://docs.aws.amazon.com/cur/latest/userguide/cur-sp.html + */ + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanCoveredUsage' + THEN 0.0 + ELSE lineitem_unblendedcost + END + ) as unblended_cost, + max(lineitem_blendedrate) as blended_rate, + /* SavingsPlanCoveredUsage entries have corresponding SavingsPlanNegation line items + that offset that cost. + https://docs.aws.amazon.com/cur/latest/userguide/cur-sp.html + */ + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanCoveredUsage' + THEN 0.0 + ELSE lineitem_blendedcost + END + ) as blended_cost, + sum(savingsplan_savingsplaneffectivecost) as savingsplan_effective_cost, + sum( + CASE + WHEN lineitem_lineitemtype='SavingsPlanCoveredUsage' + OR lineitem_lineitemtype='SavingsPlanNegation' + OR lineitem_lineitemtype='SavingsPlanUpfrontFee' + OR lineitem_lineitemtype='SavingsPlanRecurringFee' + THEN savingsplan_savingsplaneffectivecost + ELSE lineitem_unblendedcost + END + ) as calculated_amortized_cost, + sum(pricing_publicondemandcost) as public_on_demand_cost, + max(pricing_publicondemandrate) as public_on_demand_rate + FROM {{schema | sqlsafe}}.aws_line_items_daily as lid + WHERE source = '{{source_uuid | sqlsafe}}' + AND year = '{{year | sqlsafe}}' + AND month = '{{month | sqlsafe}}' + AND lineitem_productcode = 'AmazonEC2' + AND product_productfamily LIKE '%%Compute Instance%%' + AND lineitem_resourceid != '' + GROUP BY lineitem_resourceid +) AS ds +CROSS JOIN cte_pg_enabled_keys AS pek +JOIN cte_latest_values AS cte_l ON ds.resource_id = cte_l.resource_id +LEFT JOIN {{schema | sqlsafe}}.reporting_awsaccountalias AS aa + ON ds.usage_account_id = aa.account_id +RETURNING 1 diff --git a/koku/masu/database/self_hosted_sql/aws/reporting_ocpinfrastructure_provider_map.sql b/koku/masu/database/self_hosted_sql/aws/reporting_ocpinfrastructure_provider_map.sql new file mode 100755 index 0000000000..b364350b01 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/aws/reporting_ocpinfrastructure_provider_map.sql @@ -0,0 +1,64 @@ + +WITH cte_aws_resource_ids AS ( + SELECT DISTINCT lineitem_resourceid, + aws.source + FROM {{schema | sqlsafe}}.aws_line_items_daily AS aws + WHERE aws.lineitem_usagestartdate >= {{start_date}} + AND aws.lineitem_usagestartdate < {{end_date}} + INTERVAL '1 day' + AND aws.lineitem_resourceid IS NOT NULL + AND aws.lineitem_resourceid != '' + {% if aws_provider_uuid -%} + AND aws.source = {{aws_provider_uuid}} + {% endif -%} + AND aws.year = {{year}} + AND aws.month = {{month}} +), +cte_ocp_resource_ids AS ( +{% if ocp_provider_uuid -%} + SELECT DISTINCT resource_id, + ocp.source + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily AS ocp + WHERE ocp.interval_start >= {{start_date}} + AND ocp.interval_start < {{end_date}} + INTERVAL '1 day' + AND ocp.resource_id IS NOT NULL + AND ocp.resource_id != '' + AND ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND ocp.month = {{month}} +{% else -%} + SELECT DISTINCT resource_id, + ocp.source + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily AS ocp + INNER JOIN public.api_provider as provider + on ocp.source = provider.uuid::varchar + WHERE ocp.interval_start >= {{start_date}} + AND ocp.interval_start < {{end_date}} + INTERVAL '1 day' + AND ocp.resource_id IS NOT NULL + AND ocp.resource_id != '' + AND ocp.year = {{year}} + AND ocp.month = {{month}} + AND provider.type = 'OCP' + and provider.infrastructure_id IS NULL +{% endif -%} +) + +SELECT DISTINCT ocp.source as ocp_uuid, + aws.source as infra_uuid, + api_provider.type as type +FROM cte_aws_resource_ids AS aws +JOIN cte_ocp_resource_ids AS ocp + ON strpos(aws.lineitem_resourceid, ocp.resource_id) != 0 +JOIN {{schema | sqlsafe}}.reporting_tenant_api_provider as api_provider + ON aws.source = api_provider.uuid::varchar + +{% if aws_provider_uuid -%} +UNION + +SELECT uuid::varchar, + {{aws_provider_uuid}}, + infra_uuid.infrastructure_type +FROM public.api_provider AS provider_union +JOIN public.api_providerinfrastructuremap AS infra_uuid + ON provider_union.infrastructure_id = infra_uuid.id +WHERE infrastructure_provider_id::varchar = {{aws_provider_uuid}} +{% endif -%} diff --git a/koku/masu/database/self_hosted_sql/openshift/ocp_special_matched_tags.sql b/koku/masu/database/self_hosted_sql/openshift/ocp_special_matched_tags.sql new file mode 100644 index 0000000000..e36df9f7d6 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/openshift/ocp_special_matched_tags.sql @@ -0,0 +1,44 @@ +WITH cte_array_agg_nodes AS ( + SELECT DISTINCT node + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND year = {{year}} + AND month = {{month}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +), +cte_cluster_info as ( + select + '"openshift_cluster": "' || (auth.credentials->>'cluster_id') || '"' AS cluster_id, + '"openshift_cluster": "' || provider.name || '"' as cluster_alias + from public.api_provider as provider + inner join public.api_providerauthentication as auth + ON provider.authentication_id = auth.id + and provider.uuid = {{ocp_provider_uuid}}::uuid +), +cte_tag_matches AS ( + SELECT * FROM unnest(CAST(ARRAY{{matched_tag_strs | sqlsafe}} AS VARCHAR[])) as t(matched_tag) + + UNION + + SELECT cluster_alias from cte_cluster_info + + UNION + + SELECT cluster_id from cte_cluster_info + + UNION + + SELECT '"openshift_node": "' || node || '"' AS matched_tag from cte_array_agg_nodes + + UNION + + SELECT distinct '"openshift_project": "' || namespace || '"' + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND month = {{month}} + AND year = {{year}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +) +SELECT array_agg(matched_tag) as matched_tags from cte_tag_matches; diff --git a/koku/masu/processor/aws/aws_report_parquet_processor.py b/koku/masu/processor/aws/aws_report_parquet_processor.py index 227c933f78..4a33aedae3 100755 --- a/koku/masu/processor/aws/aws_report_parquet_processor.py +++ b/koku/masu/processor/aws/aws_report_parquet_processor.py @@ -14,10 +14,21 @@ from reporting.provider.aws.models import TRINO_LINE_ITEM_DAILY_TABLE from reporting.provider.aws.models import TRINO_LINE_ITEM_TABLE from reporting.provider.aws.models import TRINO_OCP_ON_AWS_DAILY_TABLE +from reporting.provider.aws.self_hosted_models import SELF_HOSTED_DAILY_MODEL_MAP +from reporting.provider.aws.self_hosted_models import SELF_HOSTED_MODEL_MAP class AWSReportParquetProcessor(ReportParquetProcessorBase): def __init__(self, manifest_id, account, s3_path, provider_uuid, start_date): + # Track if this is daily data for self-hosted model lookup + if "daily" in s3_path: + self._is_daily = True + else: + self._is_daily = False + + # Date column for deriving usage_start (AWS uses lineitem_usagestartdate) + self._date_column = "lineitem_usagestartdate" + numeric_columns = [ "lineitem_normalizationfactor", "lineitem_normalizedusageamount", @@ -65,6 +76,30 @@ def postgres_summary_table(self): """Return the mode for the source specific summary table.""" return AWSCostEntryLineItemDailySummary + @property + def self_hosted_line_item_model(self): + """Return the Django model for line item data (self-hosted/on-prem only). + + This leverages Django models instead of raw SQL table creation, + enabling automatic migrations and consistent partition management. + """ + # AWS uses a single table type key unlike OCP which has pod_usage, storage_usage, etc. + table_key = "aws_line_items" + if self._is_daily: + return SELF_HOSTED_DAILY_MODEL_MAP.get(table_key) + else: + return SELF_HOSTED_MODEL_MAP.get(table_key) + + def get_table_names_for_delete(self): + """Return all AWS table names (raw, daily, ocp_on_aws).""" + from masu.util.aws.common import get_table_names_for_delete + + return get_table_names_for_delete("AWS") + + def _prepare_dataframe_for_write(self, data_frame, metadata): + """Add AWS-specific columns before writing to PostgreSQL.""" + data_frame["manifestid"] = str(self._manifest_id) + def create_bill(self, bill_date): """Create bill postgres entry.""" if isinstance(bill_date, str): diff --git a/koku/masu/processor/ocp/ocp_report_parquet_processor.py b/koku/masu/processor/ocp/ocp_report_parquet_processor.py index ec5c940772..2b25e0bdef 100755 --- a/koku/masu/processor/ocp/ocp_report_parquet_processor.py +++ b/koku/masu/processor/ocp/ocp_report_parquet_processor.py @@ -35,6 +35,9 @@ def __init__(self, manifest_id, account, s3_path, provider_uuid, report_type, st self._report_type = report_type + # Date column for deriving usage_start (OCP uses interval_start) + self._date_column = "interval_start" + numeric_columns = [ "pod_usage_cpu_core_seconds", "pod_request_cpu_core_seconds", @@ -110,7 +113,7 @@ def get_table_names_for_delete(self): daily_table_name = TRINO_LINE_ITEM_TABLE_DAILY_MAP[self._report_type] return [raw_table_name, daily_table_name] - def delete_day_postgres(self, start_date, reportnumhours=None): + def delete_old_data_postgres(self, start_date, reportnumhours=None): """Delete old data for a specific day (OCP implementation with reportnumhours check). Deletes from both raw and daily tables, similar to how Trino deletes from multiple S3 paths. @@ -135,7 +138,7 @@ def delete_day_postgres(self, start_date, reportnumhours=None): # Delete from existing tables total_deleted = 0 for table_name in existing_tables: - delete_sql = get_report_db_accessor().get_delete_day_by_reportnumhours_sql( + delete_sql = get_report_db_accessor().get_delete_by_reportnumhours_sql( self._schema_name, table_name, self._provider_uuid, @@ -227,59 +230,7 @@ def create_bill(self, bill_date): bill.cluster_alias = cluster_alias bill.save(update_fields=["cluster_alias"]) - def write_to_self_hosted_table(self, data_frame, metadata): - """Write dataframe to PostgreSQL for on-prem using Django model infrastructure. - - This method is only called for on-prem deployments. SaaS writes to S3 parquet files instead. - Uses the standard partition naming convention (tablename_YYYY_MM) and the existing - partition infrastructure (get_or_create_postgres_partition). - """ - import pandas as pd - from uuid import uuid4 - - from sqlalchemy import create_engine - - from koku.reportdb_accessor import get_report_db_accessor - + def _prepare_dataframe_for_write(self, data_frame, metadata): + """Add OCP-specific columns before writing to PostgreSQL.""" + data_frame["manifestid"] = str(self._manifest_id) data_frame["reportnumhours"] = metadata["ReportNumHours"] - - model = self.self_hosted_line_item_model - if not model: - raise NotImplementedError( - f"No Django model found for OCP report type '{self._report_type}'. " - "On-prem requires Django models for all supported report types." - ) - - # Ensure partitions exist using the standard infrastructure - # This is the same partitioning structure we utilize in the SaaS for - # our postgresql summary tables. - self.get_or_create_postgres_partition(self._start_date, model=model) - - table_name = model._meta.db_table - - # Add partition tracking columns - data_frame["year"] = self._year - data_frame["month"] = self._month - data_frame["source"] = str(self._provider_uuid) # Store as string for SQL join compatibility - - # Add usage_start as date (derived from interval_start) for partition column - # PostgreSQL uses this to route rows to the correct partition automatically - if "interval_start" in data_frame.columns: - data_frame["usage_start"] = pd.to_datetime(data_frame["interval_start"]).dt.date - - # Generate UUIDs for each row (required for partitioned tables) - data_frame["id"] = [uuid4() for _ in range(len(data_frame))] - - # Write to the parent table - PostgreSQL routes to correct partition based on usage_start - with get_report_db_accessor().connect() as connection: - engine = create_engine("postgresql://", creator=lambda: connection.getConnection()) - data_frame.to_sql(name=table_name, con=engine, schema=self._schema_name, if_exists="append", index=False) - - LOG.info( - log_json( - msg="wrote dataframe to postgresql", - schema=self._schema_name, - table=table_name, - rows=len(data_frame), - ) - ) diff --git a/koku/masu/processor/parquet/parquet_report_processor.py b/koku/masu/processor/parquet/parquet_report_processor.py index a493bffc0d..bb9aa6a4b1 100644 --- a/koku/masu/processor/parquet/parquet_report_processor.py +++ b/koku/masu/processor/parquet/parquet_report_processor.py @@ -701,7 +701,7 @@ def _delete_old_data_postgres(self, filename): # Processor handles deleting from all relevant tables (raw and daily for OCP) processor = self._get_report_processor(daily=False) - processor.delete_day_postgres(self.start_date, reportnumhours) + processor.delete_old_data_postgres(self.start_date, reportnumhours) def _delete_old_data_trino(self, filename): metadata_key, metadata_value = self.get_metadata_kv(filename.stem) diff --git a/koku/masu/processor/report_parquet_processor_base.py b/koku/masu/processor/report_parquet_processor_base.py index ddfbddf3d0..5d8fa8ce1d 100755 --- a/koku/masu/processor/report_parquet_processor_base.py +++ b/koku/masu/processor/report_parquet_processor_base.py @@ -219,14 +219,127 @@ def sync_hive_partitions(self): LOG.info(sql) self._execute_trino_sql(sql, self._schema_name) - def write_to_self_hosted_table(self, data_frame, metadata): - """Write dataframe to self-hosted PostgreSQL table. + @property + def self_hosted_line_item_model(self): + """Return the Django model for line item data (self-hosted/on-prem only). + + Subclasses must override this property to return the appropriate model. + """ + return None + + def _prepare_dataframe_for_write(self, data_frame, metadata): + """Hook for subclasses to add provider-specific columns before writing. - This base implementation is a no-op. Subclasses that support on-prem - should override this method to write data to PostgreSQL. - For SaaS, data is written to S3 parquet files instead. + Subclasses should override this to add tracking columns like: + - OCP: manifestid, reportnumhours + - AWS/Azure/GCP: manifestid """ raise NotImplementedError( - f"{self.__class__.__name__} does not implement write_to_self_hosted_table. " - "On-prem is only supported for OCP providers." + f"{self.__class__.__name__} must implement _prepare_dataframe_for_write. " + "Add provider-specific columns (e.g., manifestid, reportnumhours) here." + ) + + def write_to_self_hosted_table(self, data_frame, metadata): + """Write dataframe to PostgreSQL for on-prem using Django model infrastructure. + + This method is only called for on-prem deployments. SaaS writes to S3 parquet files instead. + Uses the standard partition naming convention (tablename_YYYY_MM) and the existing + partition infrastructure (get_or_create_postgres_partition). + + Requires subclass to set: + - self._date_column: Column name to derive usage_start from (e.g., 'interval_start', 'lineitem_usagestartdate') + - self.self_hosted_line_item_model: Property returning Django model for the line item table + - _prepare_dataframe_for_write(): Method to add provider-specific columns + """ + import pandas as pd + from uuid import uuid4 + + from sqlalchemy import create_engine + + model = self.self_hosted_line_item_model + if not model: + raise NotImplementedError( + f"{self.__class__.__name__} does not have a self_hosted_line_item_model. " + "On-prem requires Django models for all supported providers." + ) + + # Let subclass add provider-specific columns (manifestid, reportnumhours, etc.) + self._prepare_dataframe_for_write(data_frame, metadata) + + # Ensure partitions exist using the standard infrastructure + self.get_or_create_postgres_partition(self._start_date, model=model) + + table_name = model._meta.db_table + + # Add partition tracking columns + data_frame["year"] = self._year + data_frame["month"] = self._month + data_frame["source"] = str(self._provider_uuid) + + # Add usage_start as date (derived from _date_column) for partition column + # PostgreSQL uses this to route rows to the correct partition automatically + date_column = getattr(self, "_date_column", None) + if date_column and date_column in data_frame.columns: + data_frame["usage_start"] = pd.to_datetime(data_frame[date_column]).dt.date + + # Generate UUIDs for each row (required for partitioned tables) + data_frame["id"] = [uuid4() for _ in range(len(data_frame))] + + # Write to the parent table - PostgreSQL routes to correct partition based on usage_start + with get_report_db_accessor().connect() as connection: + engine = create_engine("postgresql://", creator=lambda: connection.getConnection()) + data_frame.to_sql(name=table_name, con=engine, schema=self._schema_name, if_exists="append", index=False) + + LOG.info( + log_json( + msg="wrote dataframe to postgresql", + schema=self._schema_name, + table=table_name, + rows=len(data_frame), + ) + ) + + def get_table_names_for_delete(self): + """Return list of table names to delete from. Override in subclass if needed.""" + return [self._table_name] + + def delete_old_data_postgres(self, start_date, reportnumhours=None): + """Delete old data for this source/year/month (non-OCP). + + Uses manifestid-based deletion for AWS/Azure/GCP providers. + OCP overrides this method to use reportnumhours-based deletion. + """ + # Get all table names to delete from (may include daily tables) + table_names = self.get_table_names_for_delete() + + # Filter to only existing tables + existing_tables = [] + for table_name in table_names: + check_table_sql = get_report_db_accessor().get_table_check_sql(table_name, self._schema_name) + + with get_report_db_accessor().connect() as conn: + with conn.cursor() as cursor: + cursor.execute(check_table_sql) + if cursor.fetchone(): + existing_tables.append(table_name) + else: + LOG.debug(log_json(msg="table does not exist, skipping delete", table=table_name)) + + # Delete from existing tables + total_deleted = 0 + for table_name in existing_tables: + delete_sql = get_report_db_accessor().get_delete_by_manifestid_sql( + self._schema_name, table_name, self._provider_uuid, self._year, self._month, str(self._manifest_id) + ) + + with get_report_db_accessor().connect(schema=self._schema_name) as conn: + with conn.cursor() as cursor: + cursor.execute(delete_sql) + total_deleted += cursor.rowcount + + LOG.info( + log_json( + msg="deleted old data from postgres", + deleted_rows=total_deleted, + ) ) diff --git a/koku/masu/test/database/test_aws_report_db_accessor.py b/koku/masu/test/database/test_aws_report_db_accessor.py index f6f62f9c53..94a1a0c2cd 100644 --- a/koku/masu/test/database/test_aws_report_db_accessor.py +++ b/koku/masu/test/database/test_aws_report_db_accessor.py @@ -532,3 +532,40 @@ def test_get_matched_tags_strings_trino_disabled(self, mock_postgres_tags, mock_ 1, self.aws_provider_uuid, self.ocp_provider_uuid, "2022-04-01", "2022-04-10" ) self.assertEqual([], result) + + @patch("reporting.provider.aws.self_hosted_models.get_self_hosted_models") + def test_delete_self_hosted_data_by_source(self, mock_get_models): + """Test delete_self_hosted_data_by_source deletes data for provider.""" + from unittest.mock import MagicMock + + # Create mock models + mock_model1 = MagicMock() + mock_model1._meta.db_table = "aws_line_items" + mock_model1.objects.filter.return_value.delete.return_value = (5, {}) + + mock_model2 = MagicMock() + mock_model2._meta.db_table = "aws_line_items_daily" + mock_model2.objects.filter.return_value.delete.return_value = (3, {}) + + mock_get_models.return_value = [mock_model1, mock_model2] + + total_deleted = self.accessor.delete_self_hosted_data_by_source(self.aws_provider_uuid) + + self.assertEqual(total_deleted, 8) + mock_model1.objects.filter.assert_called_once_with(source=str(self.aws_provider_uuid)) + mock_model2.objects.filter.assert_called_once_with(source=str(self.aws_provider_uuid)) + + @patch("reporting.provider.aws.self_hosted_models.get_self_hosted_models") + def test_delete_self_hosted_data_by_source_no_data(self, mock_get_models): + """Test delete_self_hosted_data_by_source returns 0 when no data.""" + from unittest.mock import MagicMock + + mock_model = MagicMock() + mock_model._meta.db_table = "aws_line_items" + mock_model.objects.filter.return_value.delete.return_value = (0, {}) + + mock_get_models.return_value = [mock_model] + + total_deleted = self.accessor.delete_self_hosted_data_by_source(self.aws_provider_uuid) + + self.assertEqual(total_deleted, 0) diff --git a/koku/masu/test/processor/aws/test_aws_report_parquet_processor.py b/koku/masu/test/processor/aws/test_aws_report_parquet_processor.py index 8ef3c0a1ad..61aed6c8f7 100644 --- a/koku/masu/test/processor/aws/test_aws_report_parquet_processor.py +++ b/koku/masu/test/processor/aws/test_aws_report_parquet_processor.py @@ -6,6 +6,7 @@ from datetime import date from unittest.mock import patch +import pandas as pd from django_tenants.utils import schema_context from api.common import log_json @@ -18,6 +19,8 @@ from reporting.provider.aws.models import TRINO_LINE_ITEM_DAILY_TABLE from reporting.provider.aws.models import TRINO_LINE_ITEM_TABLE from reporting.provider.aws.models import TRINO_OCP_ON_AWS_DAILY_TABLE +from reporting.provider.aws.self_hosted_models import SELF_HOSTED_DAILY_MODEL_MAP +from reporting.provider.aws.self_hosted_models import SELF_HOSTED_MODEL_MAP class AWSReportProcessorParquetTest(MasuTestCase): @@ -108,3 +111,92 @@ def test_get_or_create_postgres_partition(self): with schema_context(self.schema): self.assertNotEqual(PartitionedTable.objects.filter(table_name=table_name).count(), 0) + + def test_is_daily_flag(self): + """Test that _is_daily is set correctly based on s3_path.""" + # Non-daily path + processor = AWSReportParquetProcessor( + self.manifest_id, self.account, "/s3/path", self.aws_provider_uuid, self.start_date + ) + self.assertFalse(processor._is_daily) + + # Daily path + processor_daily = AWSReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.aws_provider_uuid, self.start_date + ) + self.assertTrue(processor_daily._is_daily) + + def test_self_hosted_line_item_model(self): + """Test that self_hosted_line_item_model returns correct model.""" + # Non-daily processor + processor = AWSReportParquetProcessor( + self.manifest_id, self.account, "/s3/path", self.aws_provider_uuid, self.start_date + ) + self.assertEqual(processor.self_hosted_line_item_model, SELF_HOSTED_MODEL_MAP.get("aws_line_items")) + + # Daily processor + processor_daily = AWSReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.aws_provider_uuid, self.start_date + ) + self.assertEqual( + processor_daily.self_hosted_line_item_model, SELF_HOSTED_DAILY_MODEL_MAP.get("aws_line_items") + ) + + def test_get_table_names_for_delete(self): + """Test that all AWS table names are returned.""" + table_names = self.processor.get_table_names_for_delete() + self.assertEqual(len(table_names), 3) + self.assertIn(TRINO_LINE_ITEM_TABLE, table_names) + self.assertIn(TRINO_LINE_ITEM_DAILY_TABLE, table_names) + self.assertIn(TRINO_OCP_ON_AWS_DAILY_TABLE, table_names) + + def test_prepare_dataframe_for_write(self): + """Test that manifestid is added to dataframe.""" + data_frame = pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + metadata = {} + self.processor._prepare_dataframe_for_write(data_frame, metadata) + self.assertIn("manifestid", data_frame.columns) + self.assertEqual(data_frame["manifestid"].iloc[0], str(self.manifest_id)) + + @patch("masu.processor.report_parquet_processor_base.get_report_db_accessor") + @patch( + "masu.processor.aws.aws_report_parquet_processor.AWSReportParquetProcessor.get_or_create_postgres_partition" + ) + def test_write_to_self_hosted_table(self, mock_partition, mock_get_accessor): + """Test write_to_self_hosted_table writes data correctly.""" + # Create a daily processor (has self_hosted_line_item_model) + processor = AWSReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.aws_provider_uuid, self.start_date + ) + + data_frame = pd.DataFrame( + {"col1": [1, 2], "lineitem_usagestartdate": pd.to_datetime(["2024-01-15", "2024-01-15"])} + ) + metadata = {} + + with patch("pandas.DataFrame.to_sql") as mock_to_sql: + processor.write_to_self_hosted_table(data_frame, metadata) + + # Verify columns were added + self.assertIn("manifestid", data_frame.columns) + self.assertIn("year", data_frame.columns) + self.assertIn("month", data_frame.columns) + self.assertIn("source", data_frame.columns) + self.assertIn("usage_start", data_frame.columns) + self.assertIn("id", data_frame.columns) + + # Verify partition was created + mock_partition.assert_called_once() + + # Verify to_sql was called + mock_to_sql.assert_called_once() + + @patch("masu.processor.report_parquet_processor_base.get_report_db_accessor") + def test_delete_old_data_postgres(self, mock_get_accessor): + """Test delete_old_data_postgres.""" + mock_conn = mock_get_accessor.return_value.connect.return_value.__enter__.return_value + mock_cursor = mock_conn.cursor.return_value.__enter__.return_value + mock_cursor.fetchone.return_value = None # Table doesn't exist + mock_cursor.rowcount = 0 + + self.processor.delete_old_data_postgres(self.start_date) diff --git a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py index e7c8b258eb..febfb6e591 100644 --- a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py +++ b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py @@ -140,19 +140,19 @@ def test_get_table_names_for_delete(self): self.assertIn(TRINO_LINE_ITEM_TABLE_DAILY_MAP[self.report_type], table_names) @patch("koku.reportdb_accessor.get_report_db_accessor") - def test_delete_day_postgres(self, _): - """Test delete_day_postgres.""" - self.processor.delete_day_postgres(self.start_date, reportnumhours=24) + def test_delete_old_data_postgres(self, _): + """Test delete_old_data_postgres.""" + self.processor.delete_old_data_postgres(self.start_date, reportnumhours=24) @patch("koku.reportdb_accessor.get_report_db_accessor") - def test_delete_day_postgres_raises_when_data_exists(self, mock_get_accessor): - """Test delete_day_postgres raises exception when data exists.""" + def test_delete_old_data_postgres_raises_when_data_exists(self, mock_get_accessor): + """Test delete_old_data_postgres raises exception when data exists.""" mock_conn = mock_get_accessor.return_value.connect.return_value.__enter__.return_value mock_cursor = mock_conn.cursor.return_value.__enter__.return_value mock_cursor.rowcount = 0 mock_cursor.fetchone.return_value = (1,) with self.assertRaises(Exception): - self.processor.delete_day_postgres(self.start_date, reportnumhours=24) + self.processor.delete_old_data_postgres(self.start_date, reportnumhours=24) def test_is_daily_flag(self): """Test that _is_daily is set correctly based on s3_path.""" @@ -188,7 +188,7 @@ def test_self_hosted_line_item_model(self): @patch( "masu.processor.ocp.ocp_report_parquet_processor.OCPReportParquetProcessor.get_or_create_postgres_partition" ) - @patch("koku.reportdb_accessor.get_report_db_accessor") + @patch("masu.processor.report_parquet_processor_base.get_report_db_accessor") def test_write_to_self_hosted_table(self, mock_get_accessor, mock_partition): """Test write_to_self_hosted_table writes data correctly.""" # Create a daily processor (has self_hosted_line_item_model) diff --git a/koku/masu/test/processor/parquet/test_parquet_report_processor.py b/koku/masu/test/processor/parquet/test_parquet_report_processor.py index 42d1a0b19e..450df05df0 100644 --- a/koku/masu/test/processor/parquet/test_parquet_report_processor.py +++ b/koku/masu/test/processor/parquet/test_parquet_report_processor.py @@ -914,7 +914,7 @@ def test_write_dataframe(self, mock_get_processor): @override_settings(ONPREM=True) @patch.object(ParquetReportProcessor, "_get_report_processor") def test_delete_old_data_postgres(self, mock_get_processor): - """Test _delete_old_data_postgres calls processor.delete_day_postgres.""" + """Test _delete_old_data_postgres calls processor.delete_old_data_postgres.""" mock_processor = MagicMock() mock_get_processor.return_value = mock_processor @@ -938,4 +938,4 @@ def test_delete_old_data_postgres(self, mock_get_processor): ) report_processor._delete_old_data_postgres(filename) - mock_processor.delete_day_postgres.assert_called() + mock_processor.delete_old_data_postgres.assert_called() diff --git a/koku/masu/test/processor/test_report_parquet_processor_base.py b/koku/masu/test/processor/test_report_parquet_processor_base.py index 9e1eb69dec..e212f21484 100644 --- a/koku/masu/test/processor/test_report_parquet_processor_base.py +++ b/koku/masu/test/processor/test_report_parquet_processor_base.py @@ -193,3 +193,26 @@ def test_create_schema(self, mock_execute): with self.assertLogs(self.log_base, level="INFO") as logger: self.processor.create_schema() self.assertIn(expected_log, logger.output) + + def test_self_hosted_line_item_model_base_returns_none(self): + """Test that self_hosted_line_item_model returns None in base class.""" + self.assertIsNone(self.processor.self_hosted_line_item_model) + + def test_prepare_dataframe_for_write_raises_not_implemented(self): + """Test that _prepare_dataframe_for_write raises NotImplementedError in base class.""" + data_frame = pd.DataFrame({"col1": [1, 2]}) + with self.assertRaises(NotImplementedError): + self.processor._prepare_dataframe_for_write(data_frame, {}) + + def test_write_to_self_hosted_table_no_model_raises(self): + """Test write_to_self_hosted_table raises when no model exists.""" + data_frame = pd.DataFrame({"col1": [1, 2]}) + metadata = {} + + with self.assertRaises(NotImplementedError): + self.processor.write_to_self_hosted_table(data_frame, metadata) + + def test_get_table_names_for_delete_default(self): + """Test that get_table_names_for_delete returns the table name.""" + table_names = self.processor.get_table_names_for_delete() + self.assertEqual(table_names, [self.table_name]) diff --git a/koku/masu/test/util/aws/test_common.py b/koku/masu/test/util/aws/test_common.py index ddd334d2e9..0c174e02b8 100644 --- a/koku/masu/test/util/aws/test_common.py +++ b/koku/masu/test/util/aws/test_common.py @@ -688,6 +688,144 @@ def test_get_or_clear_daily_s3_by_date(self, mock_resource): ) self.assertEqual(result, start_date) + @patch("masu.util.aws.common.get_s3_resource") + @patch("masu.util.aws.common.settings") + def test_get_or_clear_daily_s3_by_date_onprem(self, mock_settings, mock_resource): + """Test that onprem skips parquet deletion and only clears CSV files.""" + mock_settings.ONPREM = True + mock_settings.S3_ACCESS_KEY = "fake" + mock_settings.S3_SECRET = "fake" + mock_settings.S3_REGION = "us-east-1" + mock_settings.S3_BUCKET_NAME = "fake-bucket" + mock_settings.S3_ENDPOINT = None + mock_settings.S3_TIMEOUT = 5 + start_date = self.dh.this_month_start.replace(year=2019, month=7, day=1).date() + end_date = self.dh.this_month_start.replace(year=2019, month=7, day=2).date() + with patch( + "masu.database.report_manifest_db_accessor.ReportManifestDBAccessor.get_manifest_daily_start_date", + return_value=None, + ): + with patch("masu.util.aws.common.clear_s3_files") as mock_clear_s3: + with patch("masu.util.aws.common._clear_csv_only") as mock_clear_csv: + with patch("masu.util.aws.common._delete_old_data_postgres_by_date"): + result = utils.get_or_clear_daily_s3_by_date( + "None", + "provider_uuid", + start_date, + end_date, + 1, + {"account": "test", "provider_type": "AWS"}, + "request_id", + ) + self.assertEqual(result, start_date) + mock_clear_s3.assert_not_called() + mock_clear_csv.assert_called_once() + + def test_get_table_names_for_delete_aws(self): + """Test get_table_names_for_delete returns correct tables for AWS.""" + from reporting.provider.aws.models import TRINO_LINE_ITEM_DAILY_TABLE + from reporting.provider.aws.models import TRINO_LINE_ITEM_TABLE + from reporting.provider.aws.models import TRINO_OCP_ON_AWS_DAILY_TABLE + + result = utils.get_table_names_for_delete("AWS") + self.assertEqual(result, [TRINO_LINE_ITEM_TABLE, TRINO_LINE_ITEM_DAILY_TABLE, TRINO_OCP_ON_AWS_DAILY_TABLE]) + + def test_get_table_names_for_delete_aws_local(self): + """Test get_table_names_for_delete strips -local suffix for AWS.""" + result_local = utils.get_table_names_for_delete("AWS-local") + result_plain = utils.get_table_names_for_delete("AWS") + self.assertEqual(result_local, result_plain) + + def test_get_table_names_for_delete_azure(self): + """Test get_table_names_for_delete returns correct tables for Azure.""" + from reporting.provider.azure.models import TRINO_LINE_ITEM_TABLE + from reporting.provider.azure.models import TRINO_OCP_ON_AZURE_DAILY_TABLE + + result = utils.get_table_names_for_delete("Azure") + self.assertEqual(result, [TRINO_LINE_ITEM_TABLE, TRINO_OCP_ON_AZURE_DAILY_TABLE]) + + def test_get_table_names_for_delete_unknown(self): + """Test get_table_names_for_delete returns empty list for unknown provider.""" + result = utils.get_table_names_for_delete("GCP") + self.assertEqual(result, []) + + @patch("koku.reportdb_accessor.get_report_db_accessor") + def test_delete_old_data_postgres_by_date(self, mock_get_accessor): + """Test _delete_old_data_postgres_by_date calls correct SQL methods.""" + mock_accessor = Mock() + mock_get_accessor.return_value = mock_accessor + + mock_cursor = Mock() + mock_cursor.rowcount = 5 + mock_cursor.__enter__ = Mock(return_value=mock_cursor) + mock_cursor.__exit__ = Mock(return_value=False) + + mock_conn = Mock() + mock_conn.cursor.return_value = mock_cursor + mock_conn.__enter__ = Mock(return_value=mock_conn) + mock_conn.__exit__ = Mock(return_value=False) + + # First call checks table existence (returns True), second call runs delete + mock_accessor.connect.return_value = mock_conn + mock_cursor.fetchone.return_value = (1,) + + mock_accessor.get_table_check_sql.return_value = "CHECK SQL" + mock_accessor.get_delete_by_manifestid_and_date_sql.return_value = "DELETE SQL" + + utils._delete_old_data_postgres_by_date( + "test_schema", "provider-uuid", "AWS", "2025", "03", "manifest-1", "2025-03-05" + ) + + # Should be called for each AWS table (3 tables) + self.assertEqual(mock_accessor.get_table_check_sql.call_count, 3) + self.assertEqual(mock_accessor.get_delete_by_manifestid_and_date_sql.call_count, 3) + + @patch("koku.reportdb_accessor.get_report_db_accessor") + def test_delete_old_data_postgres_by_date_table_not_exists(self, mock_get_accessor): + """Test _delete_old_data_postgres_by_date skips tables that don't exist.""" + mock_accessor = Mock() + mock_get_accessor.return_value = mock_accessor + + mock_cursor = Mock() + mock_cursor.__enter__ = Mock(return_value=mock_cursor) + mock_cursor.__exit__ = Mock(return_value=False) + + mock_conn = Mock() + mock_conn.cursor.return_value = mock_cursor + mock_conn.__enter__ = Mock(return_value=mock_conn) + mock_conn.__exit__ = Mock(return_value=False) + + mock_accessor.connect.return_value = mock_conn + mock_cursor.fetchone.return_value = None # Table doesn't exist + + mock_accessor.get_table_check_sql.return_value = "CHECK SQL" + + utils._delete_old_data_postgres_by_date( + "test_schema", "provider-uuid", "AWS", "2025", "03", "manifest-1", "2025-03-05" + ) + + # Check SQL called for each table, but delete never called + self.assertEqual(mock_accessor.get_table_check_sql.call_count, 3) + mock_accessor.get_delete_by_manifestid_and_date_sql.assert_not_called() + + @patch("masu.util.aws.common.get_s3_resource") + def test_clear_csv_only(self, mock_resource): + """Test _clear_csv_only deletes CSV files and marks CSV cleared.""" + manifest_accessor = Mock() + manifest = Mock() + with patch("masu.util.aws.common.get_s3_objects_not_matching_metadata", return_value=["key1"]) as mock_get: + with patch("masu.util.aws.common.delete_s3_objects") as mock_delete: + utils._clear_csv_only("csv_path", 1, manifest, manifest_accessor, {"account": "test"}, "request_id") + mock_get.assert_called_once_with( + "request_id", + "csv_path", + metadata_key="manifestid", + metadata_value_check="1", + context={"account": "test"}, + ) + mock_delete.assert_called_once_with("request_id", ["key1"], {"account": "test"}) + manifest_accessor.mark_s3_csv_cleared.assert_called_once_with(manifest) + class AwsArnTest(TestCase): """AwnArn class test case.""" diff --git a/koku/masu/util/aws/common.py b/koku/masu/util/aws/common.py index 4f43250dec..b15c0cc735 100644 --- a/koku/masu/util/aws/common.py +++ b/koku/masu/util/aws/common.py @@ -562,6 +562,79 @@ def _get_s3_objects(s3_path): return s3_resource.Bucket(settings.S3_BUCKET_NAME).objects.filter(Prefix=s3_path) +def get_table_names_for_delete(provider_type): + """Return table names for on-prem postgres deletion, by provider type. + + Used by both the downloader (date-scoped delete) and processor (full-month delete). + """ + from reporting.provider.aws.models import TRINO_LINE_ITEM_DAILY_TABLE as AWS_DAILY + from reporting.provider.aws.models import TRINO_LINE_ITEM_TABLE as AWS_RAW + from reporting.provider.aws.models import TRINO_OCP_ON_AWS_DAILY_TABLE as AWS_OCP + from reporting.provider.azure.models import TRINO_LINE_ITEM_TABLE as AZURE_RAW + from reporting.provider.azure.models import TRINO_OCP_ON_AZURE_DAILY_TABLE as AZURE_OCP + + provider_type_stripped = provider_type.replace("-local", "") + if provider_type_stripped == Provider.PROVIDER_AWS: + return [AWS_RAW, AWS_DAILY, AWS_OCP] + elif provider_type_stripped == Provider.PROVIDER_AZURE: + return [AZURE_RAW, AZURE_OCP] + return [] + + +def _delete_old_data_postgres_by_date( + schema_name, provider_uuid, provider_type, year, month, manifest_id, processing_date +): + """Delete postgres rows for dates >= processing_date with a different manifestid. + + This is the on-prem equivalent of Trino's clear_s3_files which deletes + S3 parquet files from processing_date onwards. It ensures that only data + being reprocessed is deleted, preserving earlier days in the month. + """ + from koku.reportdb_accessor import get_report_db_accessor + + db_accessor = get_report_db_accessor() + table_names = get_table_names_for_delete(provider_type) + + total_deleted = 0 + for table_name in table_names: + check_table_sql = db_accessor.get_table_check_sql(table_name, schema_name) + with db_accessor.connect() as conn: + with conn.cursor() as cursor: + cursor.execute(check_table_sql) + if not cursor.fetchone(): + continue + + delete_sql = db_accessor.get_delete_by_manifestid_and_date_sql( + schema_name, table_name, str(provider_uuid), year, month, str(manifest_id), str(processing_date) + ) + with db_accessor.connect(schema=schema_name) as conn: + with conn.cursor() as cursor: + cursor.execute(delete_sql) + total_deleted += cursor.rowcount + + LOG.info( + log_json( + msg="deleted old data from postgres (date-scoped)", + deleted_rows=total_deleted, + processing_date=str(processing_date), + schema=schema_name, + ) + ) + + +def _clear_csv_only(csv_s3_path, manifest_id, manifest, manifest_accessor, context, request_id): + """Delete only CSV files from S3 and mark CSV cleared, leaving parquet for later processing.""" + to_delete = get_s3_objects_not_matching_metadata( + request_id, + csv_s3_path, + metadata_key="manifestid", + metadata_value_check=str(manifest_id), + context=context, + ) + delete_s3_objects(request_id, to_delete, context) + manifest_accessor.mark_s3_csv_cleared(manifest) + + def get_or_clear_daily_s3_by_date(csv_s3_path, provider_uuid, start_date, end_date, manifest_id, context, request_id): """ Fetches latest processed date based on daily csv files and clears relevant s3 files @@ -572,8 +645,25 @@ def get_or_clear_daily_s3_by_date(csv_s3_path, provider_uuid, start_date, end_da processing_date = manifest_accessor.get_manifest_daily_start_date(manifest_id) if processing_date: if not manifest_accessor.get_s3_parquet_cleared(manifest): - # Prevent other works running trino queries until all files are removed. - clear_s3_files(csv_s3_path, provider_uuid, processing_date, "manifestid", manifest_id, context, request_id) + if settings.ONPREM: + _clear_csv_only(csv_s3_path, manifest_id, manifest, manifest_accessor, context, request_id) + year = str(processing_date.year) + month = str(processing_date.month).zfill(2) + _delete_old_data_postgres_by_date( + context.get("account"), + provider_uuid, + context.get("provider_type"), + year, + month, + manifest_id, + processing_date, + ) + manifest_accessor.mark_s3_parquet_cleared(manifest) + else: + # Prevent other works running trino queries until all files are removed. + clear_s3_files( + csv_s3_path, provider_uuid, processing_date, "manifestid", manifest_id, context, request_id + ) return processing_date processing_date = start_date try: @@ -592,8 +682,23 @@ def get_or_clear_daily_s3_by_date(csv_s3_path, provider_uuid, start_date, end_da ) # Set processing date for all workers processing_date = manifest_accessor.set_manifest_daily_start_date(manifest_id, processing_date) - # Try to clear s3 files for dates. Small edge case, we may have parquet files even without csvs - clear_s3_files(csv_s3_path, provider_uuid, processing_date, "manifestid", manifest_id, context, request_id) + if settings.ONPREM: + _clear_csv_only(csv_s3_path, manifest_id, manifest, manifest_accessor, context, request_id) + year = str(processing_date.year) + month = str(processing_date.month).zfill(2) + _delete_old_data_postgres_by_date( + context.get("account"), + provider_uuid, + context.get("provider_type"), + year, + month, + manifest_id, + processing_date, + ) + manifest_accessor.mark_s3_parquet_cleared(manifest) + else: + # Try to clear s3 files for dates. Small edge case, we may have parquet files even without csvs + clear_s3_files(csv_s3_path, provider_uuid, processing_date, "manifestid", manifest_id, context, request_id) except (EndpointConnectionError, ClientError, AttributeError, ValueError): msg = ( "unable to fetch date from objects, " diff --git a/koku/reporting/migrations/0351_awslineitem_awslineitemdaily_and_more.py b/koku/reporting/migrations/0351_awslineitem_awslineitemdaily_and_more.py new file mode 100644 index 0000000000..f22b039ec7 --- /dev/null +++ b/koku/reporting/migrations/0351_awslineitem_awslineitemdaily_and_more.py @@ -0,0 +1,425 @@ +# Generated by Django 5.2.11 on 2026-05-11 05:54 +import uuid + +from django.db import migrations +from django.db import models + +from koku.database import set_pg_extended_mode +from koku.database import unset_pg_extended_mode + + +class Migration(migrations.Migration): + + dependencies = [ + ("reporting", "0350_widen_ratestousage_label_hash"), + ] + + operations = [ + migrations.RunPython(code=set_pg_extended_mode, reverse_code=unset_pg_extended_mode), + migrations.CreateModel( + name="AWSLineItem", + fields=[ + ( + "id", + models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False), + ), + ("usage_start", models.DateField(db_index=True, null=True)), + ("source", models.CharField(db_index=True, max_length=64, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("manifestid", models.CharField(max_length=256, null=True)), + ("bill_payeraccountid", models.CharField(max_length=50, null=True)), + ("bill_billingentity", models.CharField(max_length=50, null=True)), + ("bill_billingperiodstartdate", models.DateTimeField(null=True)), + ("bill_billingperiodenddate", models.DateTimeField(null=True)), + ("bill_billtype", models.TextField(null=True)), + ("bill_invoiceid", models.TextField(null=True)), + ("identity_timeinterval", models.TextField(null=True)), + ( + "lineitem_usagestartdate", + models.DateTimeField(db_index=True, null=True), + ), + ("lineitem_usageenddate", models.DateTimeField(null=True)), + ("lineitem_productcode", models.CharField(max_length=256, null=True)), + ("lineitem_usageaccountid", models.CharField(max_length=50, null=True)), + ( + "lineitem_availabilityzone", + models.CharField(max_length=50, null=True), + ), + ("lineitem_resourceid", models.CharField(max_length=256, null=True)), + ("lineitem_lineitemtype", models.CharField(max_length=50, null=True)), + ("lineitem_lineitemdescription", models.TextField(null=True)), + ("lineitem_legalentity", models.CharField(max_length=256, null=True)), + ("lineitem_usagetype", models.TextField(null=True)), + ("lineitem_operation", models.TextField(null=True)), + ("lineitem_taxtype", models.TextField(null=True)), + ("lineitem_usageamount", models.FloatField(null=True)), + ("lineitem_normalizationfactor", models.FloatField(null=True)), + ("lineitem_normalizedusageamount", models.FloatField(null=True)), + ("lineitem_currencycode", models.CharField(max_length=10, null=True)), + ("lineitem_unblendedrate", models.FloatField(null=True)), + ("lineitem_unblendedcost", models.FloatField(null=True)), + ("lineitem_blendedrate", models.FloatField(null=True)), + ("lineitem_blendedcost", models.FloatField(null=True)), + ("product_productfamily", models.CharField(max_length=150, null=True)), + ("product_region", models.CharField(max_length=50, null=True)), + ("product_instancetype", models.CharField(max_length=50, null=True)), + ("product_productname", models.CharField(max_length=256, null=True)), + ("product_physicalcores", models.CharField(max_length=50, null=True)), + ("product_vcpu", models.CharField(max_length=50, null=True)), + ("product_memory", models.TextField(null=True)), + ("product_operatingsystem", models.TextField(null=True)), + ("product_servicecode", models.TextField(null=True)), + ("product_sku", models.TextField(null=True)), + ("pricing_unit", models.TextField(null=True)), + ("pricing_publicondemandcost", models.FloatField(null=True)), + ("pricing_publicondemandrate", models.FloatField(null=True)), + ("pricing_term", models.TextField(null=True)), + ("savingsplan_savingsplaneffectivecost", models.FloatField(null=True)), + ( + "reservation_amortizedupfrontcostforusage", + models.TextField(null=True), + ), + ( + "reservation_amortizedupfrontfeeforbillingperiod", + models.TextField(null=True), + ), + ("reservation_endtime", models.TextField(null=True)), + ("reservation_numberofreservations", models.TextField(null=True)), + ("reservation_recurringfeeforusage", models.TextField(null=True)), + ("reservation_starttime", models.TextField(null=True)), + ("reservation_unitsperreservation", models.TextField(null=True)), + ("reservation_unusedquantity", models.TextField(null=True)), + ("reservation_unusedrecurringfee", models.TextField(null=True)), + ("resourcetags", models.TextField(null=True)), + ("costcategory", models.TextField(null=True)), + ("row_uuid", models.TextField(null=True)), + ], + options={ + "db_table": "aws_line_items", + "indexes": [models.Index(fields=["source", "year", "month"], name="aws_li_src_yr_mo_idx")], + }, + ), + migrations.CreateModel( + name="AWSLineItemDaily", + fields=[ + ( + "id", + models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False), + ), + ("usage_start", models.DateField(db_index=True, null=True)), + ("source", models.CharField(db_index=True, max_length=64, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("manifestid", models.CharField(max_length=256, null=True)), + ("bill_payeraccountid", models.CharField(max_length=50, null=True)), + ("bill_billingentity", models.CharField(max_length=50, null=True)), + ("bill_billingperiodstartdate", models.DateTimeField(null=True)), + ("bill_billingperiodenddate", models.DateTimeField(null=True)), + ("bill_billtype", models.TextField(null=True)), + ("bill_invoiceid", models.TextField(null=True)), + ("identity_timeinterval", models.TextField(null=True)), + ( + "lineitem_usagestartdate", + models.DateTimeField(db_index=True, null=True), + ), + ("lineitem_usageenddate", models.DateTimeField(null=True)), + ("lineitem_productcode", models.CharField(max_length=256, null=True)), + ("lineitem_usageaccountid", models.CharField(max_length=50, null=True)), + ( + "lineitem_availabilityzone", + models.CharField(max_length=50, null=True), + ), + ("lineitem_resourceid", models.CharField(max_length=256, null=True)), + ("lineitem_lineitemtype", models.CharField(max_length=50, null=True)), + ("lineitem_lineitemdescription", models.TextField(null=True)), + ("lineitem_legalentity", models.CharField(max_length=256, null=True)), + ("lineitem_usagetype", models.TextField(null=True)), + ("lineitem_operation", models.TextField(null=True)), + ("lineitem_taxtype", models.TextField(null=True)), + ("lineitem_usageamount", models.FloatField(null=True)), + ("lineitem_normalizationfactor", models.FloatField(null=True)), + ("lineitem_normalizedusageamount", models.FloatField(null=True)), + ("lineitem_currencycode", models.CharField(max_length=10, null=True)), + ("lineitem_unblendedrate", models.FloatField(null=True)), + ("lineitem_unblendedcost", models.FloatField(null=True)), + ("lineitem_blendedrate", models.FloatField(null=True)), + ("lineitem_blendedcost", models.FloatField(null=True)), + ("product_productfamily", models.CharField(max_length=150, null=True)), + ("product_region", models.CharField(max_length=50, null=True)), + ("product_instancetype", models.CharField(max_length=50, null=True)), + ("product_productname", models.CharField(max_length=256, null=True)), + ("product_physicalcores", models.CharField(max_length=50, null=True)), + ("product_vcpu", models.CharField(max_length=50, null=True)), + ("product_memory", models.TextField(null=True)), + ("product_operatingsystem", models.TextField(null=True)), + ("product_servicecode", models.TextField(null=True)), + ("product_sku", models.TextField(null=True)), + ("pricing_unit", models.TextField(null=True)), + ("pricing_publicondemandcost", models.FloatField(null=True)), + ("pricing_publicondemandrate", models.FloatField(null=True)), + ("pricing_term", models.TextField(null=True)), + ("savingsplan_savingsplaneffectivecost", models.FloatField(null=True)), + ( + "reservation_amortizedupfrontcostforusage", + models.TextField(null=True), + ), + ( + "reservation_amortizedupfrontfeeforbillingperiod", + models.TextField(null=True), + ), + ("reservation_endtime", models.TextField(null=True)), + ("reservation_numberofreservations", models.TextField(null=True)), + ("reservation_recurringfeeforusage", models.TextField(null=True)), + ("reservation_starttime", models.TextField(null=True)), + ("reservation_unitsperreservation", models.TextField(null=True)), + ("reservation_unusedquantity", models.TextField(null=True)), + ("reservation_unusedrecurringfee", models.TextField(null=True)), + ("resourcetags", models.TextField(null=True)), + ("costcategory", models.TextField(null=True)), + ("row_uuid", models.TextField(null=True)), + ], + options={ + "db_table": "aws_line_items_daily", + "indexes": [ + models.Index( + fields=["source", "year", "month"], + name="aws_li_daily_src_yr_mo_idx", + ) + ], + }, + ), + migrations.CreateModel( + name="ManagedAWSOpenShiftDaily", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("product_code", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("usage_account_id", models.CharField(max_length=256, null=True)), + ("availability_zone", models.CharField(max_length=256, null=True)), + ("product_family", models.CharField(max_length=256, null=True)), + ("instance_type", models.CharField(max_length=256, null=True)), + ("region", models.CharField(max_length=256, null=True)), + ("unit", models.CharField(max_length=256, null=True)), + ("tags", models.TextField(null=True)), + ("aws_cost_category", models.TextField(null=True)), + ( + "data_transfer_direction", + models.CharField(max_length=256, null=True), + ), + ("usage_amount", models.FloatField(null=True)), + ("currency_code", models.CharField(max_length=256, null=True)), + ("unblended_cost", models.FloatField(null=True)), + ("blended_cost", models.FloatField(null=True)), + ("savingsplan_effective_cost", models.FloatField(null=True)), + ("calculated_amortized_cost", models.FloatField(null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_aws_openshift_daily_temp", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="aws_daily_tmp_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="aws_daily_tmp_day_idx"), + ], + }, + ), + migrations.CreateModel( + name="ManagedAWSOpenShiftDiskCapacities", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("resource_id", models.CharField(max_length=256, null=True)), + ("capacity", models.IntegerField(null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_aws_openshift_disk_capacities_temp", + "indexes": [ + models.Index( + fields=["ocp_source", "year", "month"], + name="aws_disk_cap_src_yr_mo_idx", + ) + ], + }, + ), + migrations.CreateModel( + name="ManagedOCPAWSCostLineItemProjectDailySummary", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("cluster_id", models.CharField(max_length=256, null=True)), + ("cluster_alias", models.CharField(max_length=256, null=True)), + ("data_source", models.CharField(max_length=256, null=True)), + ("namespace", models.CharField(max_length=256, null=True)), + ("node", models.CharField(max_length=256, null=True)), + ("persistentvolumeclaim", models.CharField(max_length=256, null=True)), + ("persistentvolume", models.CharField(max_length=256, null=True)), + ("storageclass", models.CharField(max_length=256, null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("usage_end", models.DateTimeField(null=True)), + ("product_code", models.CharField(max_length=256, null=True)), + ("product_family", models.CharField(max_length=256, null=True)), + ("instance_type", models.CharField(max_length=256, null=True)), + ("usage_account_id", models.CharField(max_length=256, null=True)), + ("account_alias_id", models.IntegerField(null=True)), + ("availability_zone", models.CharField(max_length=256, null=True)), + ("region", models.CharField(max_length=256, null=True)), + ("unit", models.CharField(max_length=256, null=True)), + ("usage_amount", models.FloatField(null=True)), + ( + "data_transfer_direction", + models.CharField(max_length=256, null=True), + ), + ("currency_code", models.CharField(max_length=256, null=True)), + ("unblended_cost", models.FloatField(null=True)), + ("markup_cost", models.FloatField(null=True)), + ("blended_cost", models.FloatField(null=True)), + ("markup_cost_blended", models.FloatField(null=True)), + ("savingsplan_effective_cost", models.FloatField(null=True)), + ("markup_cost_savingsplan", models.FloatField(null=True)), + ("calculated_amortized_cost", models.FloatField(null=True)), + ("markup_cost_amortized", models.FloatField(null=True)), + ("pod_cost", models.FloatField(null=True)), + ("project_markup_cost", models.FloatField(null=True)), + ("pod_labels", models.TextField(null=True)), + ("tags", models.TextField(null=True)), + ("aws_cost_category", models.TextField(null=True)), + ("cost_category_id", models.IntegerField(null=True)), + ("project_rank", models.IntegerField(null=True)), + ("data_source_rank", models.IntegerField(null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_reporting_ocpawscostlineitem_project_daily_summary", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="aws_summ_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="aws_summ_day_idx"), + models.Index(fields=["usage_start"], name="aws_summ_usage_start_idx"), + ], + }, + ), + migrations.CreateModel( + name="ManagedOCPAWSCostLineItemProjectDailySummaryTemp", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("cluster_id", models.CharField(max_length=256, null=True)), + ("cluster_alias", models.CharField(max_length=256, null=True)), + ("data_source", models.CharField(max_length=256, null=True)), + ("namespace", models.CharField(max_length=256, null=True)), + ("node", models.CharField(max_length=256, null=True)), + ("persistentvolumeclaim", models.CharField(max_length=256, null=True)), + ("persistentvolume", models.CharField(max_length=256, null=True)), + ("storageclass", models.CharField(max_length=256, null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("usage_end", models.DateTimeField(null=True)), + ("product_code", models.CharField(max_length=256, null=True)), + ("product_family", models.CharField(max_length=256, null=True)), + ("instance_type", models.CharField(max_length=256, null=True)), + ("usage_account_id", models.CharField(max_length=256, null=True)), + ("availability_zone", models.CharField(max_length=256, null=True)), + ("region", models.CharField(max_length=256, null=True)), + ("unit", models.CharField(max_length=256, null=True)), + ("usage_amount", models.FloatField(null=True)), + ("currency_code", models.CharField(max_length=256, null=True)), + ("unblended_cost", models.FloatField(null=True)), + ("markup_cost", models.FloatField(null=True)), + ("blended_cost", models.FloatField(null=True)), + ("markup_cost_blended", models.FloatField(null=True)), + ("savingsplan_effective_cost", models.FloatField(null=True)), + ("markup_cost_savingsplan", models.FloatField(null=True)), + ("calculated_amortized_cost", models.FloatField(null=True)), + ("markup_cost_amortized", models.FloatField(null=True)), + ("pod_cost", models.FloatField(null=True)), + ("project_markup_cost", models.FloatField(null=True)), + ("pod_effective_usage_cpu_core_hours", models.FloatField(null=True)), + ( + "pod_effective_usage_memory_gigabyte_hours", + models.FloatField(null=True), + ), + ("node_capacity_cpu_core_hours", models.FloatField(null=True)), + ("node_capacity_memory_gigabyte_hours", models.FloatField(null=True)), + ("pod_labels", models.TextField(null=True)), + ("volume_labels", models.TextField(null=True)), + ("tags", models.TextField(null=True)), + ("aws_cost_category", models.TextField(null=True)), + ("cost_category_id", models.IntegerField(null=True)), + ("project_rank", models.IntegerField(null=True)), + ("data_source_rank", models.IntegerField(null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_reporting_ocpawscostlineitem_project_daily_summary_temp", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="aws_summ_tmp_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="aws_summ_tmp_day_idx"), + ], + }, + ), + migrations.RunPython(code=unset_pg_extended_mode, reverse_code=set_pg_extended_mode), + ] diff --git a/koku/reporting/provider/aws/models.py b/koku/reporting/provider/aws/models.py index 8c709be77c..01a982848c 100644 --- a/koku/reporting/provider/aws/models.py +++ b/koku/reporting/provider/aws/models.py @@ -761,3 +761,16 @@ class Meta: source_uuid = models.ForeignKey( "reporting.TenantAPIProvider", on_delete=models.CASCADE, unique=False, null=True, db_column="source_uuid" ) + + +# Import self-hosted models to register them with Django (required for partition table creation) +from reporting.provider.aws.self_hosted_models import AWSLineItem # noqa: E402, F401 +from reporting.provider.aws.self_hosted_models import AWSLineItemDaily # noqa: E402, F401 +from reporting.provider.aws.openshift.self_hosted_models import ManagedAWSOpenShiftDaily # noqa: E402, F401 +from reporting.provider.aws.openshift.self_hosted_models import ManagedAWSOpenShiftDiskCapacities # noqa: E402, F401 +from reporting.provider.aws.openshift.self_hosted_models import ( # noqa: E402, F401 + ManagedOCPAWSCostLineItemProjectDailySummary, +) +from reporting.provider.aws.openshift.self_hosted_models import ( # noqa: E402, F401 + ManagedOCPAWSCostLineItemProjectDailySummaryTemp, +) diff --git a/koku/reporting/provider/aws/openshift/self_hosted_models.py b/koku/reporting/provider/aws/openshift/self_hosted_models.py new file mode 100644 index 0000000000..2bcecf5e42 --- /dev/null +++ b/koku/reporting/provider/aws/openshift/self_hosted_models.py @@ -0,0 +1,173 @@ +# +# Copyright 2025 Red Hat Inc. +# SPDX-License-Identifier: Apache-2.0 +# +"""Django models for AWS OpenShift managed tables (on-prem PostgreSQL).""" +from django.db import models + + +class ManagedAWSOpenShiftDaily(models.Model): + class Meta: + db_table = "managed_aws_openshift_daily_temp" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="aws_daily_tmp_src_yr_mo_idx"), + models.Index(fields=["day"], name="aws_daily_tmp_day_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + resource_id = models.CharField(max_length=256, null=True) + product_code = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + usage_account_id = models.CharField(max_length=256, null=True) + availability_zone = models.CharField(max_length=256, null=True) + product_family = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + region = models.CharField(max_length=256, null=True) + unit = models.CharField(max_length=256, null=True) + tags = models.TextField(null=True) + aws_cost_category = models.TextField(null=True) + data_transfer_direction = models.CharField(max_length=256, null=True) + usage_amount = models.FloatField(null=True) + currency_code = models.CharField(max_length=256, null=True) + unblended_cost = models.FloatField(null=True) + blended_cost = models.FloatField(null=True) + savingsplan_effective_cost = models.FloatField(null=True) + calculated_amortized_cost = models.FloatField(null=True) + resource_id_matched = models.BooleanField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedOCPAWSCostLineItemProjectDailySummaryTemp(models.Model): + class Meta: + db_table = "managed_reporting_ocpawscostlineitem_project_daily_summary_temp" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="aws_summ_tmp_src_yr_mo_idx"), + models.Index(fields=["day"], name="aws_summ_tmp_day_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + cluster_id = models.CharField(max_length=256, null=True) + cluster_alias = models.CharField(max_length=256, null=True) + data_source = models.CharField(max_length=256, null=True) + namespace = models.CharField(max_length=256, null=True) + node = models.CharField(max_length=256, null=True) + persistentvolumeclaim = models.CharField(max_length=256, null=True) + persistentvolume = models.CharField(max_length=256, null=True) + storageclass = models.CharField(max_length=256, null=True) + resource_id = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + usage_end = models.DateTimeField(null=True) + product_code = models.CharField(max_length=256, null=True) + product_family = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + usage_account_id = models.CharField(max_length=256, null=True) + availability_zone = models.CharField(max_length=256, null=True) + region = models.CharField(max_length=256, null=True) + unit = models.CharField(max_length=256, null=True) + usage_amount = models.FloatField(null=True) + currency_code = models.CharField(max_length=256, null=True) + unblended_cost = models.FloatField(null=True) + markup_cost = models.FloatField(null=True) + blended_cost = models.FloatField(null=True) + markup_cost_blended = models.FloatField(null=True) + savingsplan_effective_cost = models.FloatField(null=True) + markup_cost_savingsplan = models.FloatField(null=True) + calculated_amortized_cost = models.FloatField(null=True) + markup_cost_amortized = models.FloatField(null=True) + pod_cost = models.FloatField(null=True) + project_markup_cost = models.FloatField(null=True) + pod_effective_usage_cpu_core_hours = models.FloatField(null=True) + pod_effective_usage_memory_gigabyte_hours = models.FloatField(null=True) + node_capacity_cpu_core_hours = models.FloatField(null=True) + node_capacity_memory_gigabyte_hours = models.FloatField(null=True) + pod_labels = models.TextField(null=True) + volume_labels = models.TextField(null=True) + tags = models.TextField(null=True) + aws_cost_category = models.TextField(null=True) + cost_category_id = models.IntegerField(null=True) + project_rank = models.IntegerField(null=True) + data_source_rank = models.IntegerField(null=True) + resource_id_matched = models.BooleanField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedOCPAWSCostLineItemProjectDailySummary(models.Model): + class Meta: + db_table = "managed_reporting_ocpawscostlineitem_project_daily_summary" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="aws_summ_src_yr_mo_idx"), + models.Index(fields=["day"], name="aws_summ_day_idx"), + models.Index(fields=["usage_start"], name="aws_summ_usage_start_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + cluster_id = models.CharField(max_length=256, null=True) + cluster_alias = models.CharField(max_length=256, null=True) + data_source = models.CharField(max_length=256, null=True) + namespace = models.CharField(max_length=256, null=True) + node = models.CharField(max_length=256, null=True) + persistentvolumeclaim = models.CharField(max_length=256, null=True) + persistentvolume = models.CharField(max_length=256, null=True) + storageclass = models.CharField(max_length=256, null=True) + resource_id = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + usage_end = models.DateTimeField(null=True) + product_code = models.CharField(max_length=256, null=True) + product_family = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + usage_account_id = models.CharField(max_length=256, null=True) + account_alias_id = models.IntegerField(null=True) + availability_zone = models.CharField(max_length=256, null=True) + region = models.CharField(max_length=256, null=True) + unit = models.CharField(max_length=256, null=True) + usage_amount = models.FloatField(null=True) + data_transfer_direction = models.CharField(max_length=256, null=True) + currency_code = models.CharField(max_length=256, null=True) + unblended_cost = models.FloatField(null=True) + markup_cost = models.FloatField(null=True) + blended_cost = models.FloatField(null=True) + markup_cost_blended = models.FloatField(null=True) + savingsplan_effective_cost = models.FloatField(null=True) + markup_cost_savingsplan = models.FloatField(null=True) + calculated_amortized_cost = models.FloatField(null=True) + markup_cost_amortized = models.FloatField(null=True) + pod_cost = models.FloatField(null=True) + project_markup_cost = models.FloatField(null=True) + pod_labels = models.TextField(null=True) + tags = models.TextField(null=True) + aws_cost_category = models.TextField(null=True) + cost_category_id = models.IntegerField(null=True) + project_rank = models.IntegerField(null=True) + data_source_rank = models.IntegerField(null=True) + resource_id_matched = models.BooleanField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedAWSOpenShiftDiskCapacities(models.Model): + class Meta: + db_table = "managed_aws_openshift_disk_capacities_temp" + indexes = [ + models.Index(fields=["ocp_source", "year", "month"], name="aws_disk_cap_src_yr_mo_idx"), + ] + + resource_id = models.CharField(max_length=256, null=True) + capacity = models.IntegerField(null=True) + usage_start = models.DateTimeField(null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) diff --git a/koku/reporting/provider/aws/self_hosted_models.py b/koku/reporting/provider/aws/self_hosted_models.py new file mode 100644 index 0000000000..23e946c18b --- /dev/null +++ b/koku/reporting/provider/aws/self_hosted_models.py @@ -0,0 +1,167 @@ +# +# Copyright 2025 Red Hat Inc. +# SPDX-License-Identifier: Apache-2.0 +# +"""Django models for AWS line item tables (on-prem PostgreSQL storage).""" +from uuid import uuid4 + +from django.db import models + + +class AWSLineItemBase(models.Model): + """Abstract base class for AWS line item tables. + + These models replace the raw SQL table creation for on-prem PostgreSQL storage. + They provide: + - Django migration support for schema evolution + - Single-column date partitioning (usage_start) for efficient data management + - Integration with existing PartitionedTable infrastructure + """ + + class Meta: + abstract = True + + class PartitionInfo: + partition_type = "RANGE" + partition_cols = ["usage_start"] + + # UUID primary key for compatibility with SQLAlchemy to_sql and partitioned tables + id = models.UUIDField(primary_key=True, default=uuid4) + + # Partition column - DateField for proper range partitioning (matches existing infrastructure) + usage_start = models.DateField(null=True, db_index=True) + + # Partition-related columns (indexed, not partitioned in PostgreSQL) + # source is stored as varchar to match Trino/parquet storage and existing SQL joins + source = models.CharField(max_length=64, null=True, db_index=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + + # Manifest tracking (AWS uses manifestid instead of reportnumhours) + manifestid = models.CharField(max_length=256, null=True) + + # AWS billing columns + bill_payeraccountid = models.CharField(max_length=50, null=True) + bill_billingentity = models.CharField(max_length=50, null=True) + bill_billingperiodstartdate = models.DateTimeField(null=True) + bill_billingperiodenddate = models.DateTimeField(null=True) + bill_billtype = models.TextField(null=True) + bill_invoiceid = models.TextField(null=True) + + # Identity columns + identity_timeinterval = models.TextField(null=True) + + # Line item identification + lineitem_usagestartdate = models.DateTimeField(null=True, db_index=True) + lineitem_usageenddate = models.DateTimeField(null=True) + lineitem_productcode = models.CharField(max_length=256, null=True) + lineitem_usageaccountid = models.CharField(max_length=50, null=True) + lineitem_availabilityzone = models.CharField(max_length=50, null=True) + lineitem_resourceid = models.CharField(max_length=256, null=True) + lineitem_lineitemtype = models.CharField(max_length=50, null=True) + lineitem_lineitemdescription = models.TextField(null=True) + lineitem_legalentity = models.CharField(max_length=256, null=True) + lineitem_usagetype = models.TextField(null=True) + lineitem_operation = models.TextField(null=True) + lineitem_taxtype = models.TextField(null=True) + + # Usage metrics + lineitem_usageamount = models.FloatField(null=True) + lineitem_normalizationfactor = models.FloatField(null=True) + lineitem_normalizedusageamount = models.FloatField(null=True) + + # Cost metrics + lineitem_currencycode = models.CharField(max_length=10, null=True) + lineitem_unblendedrate = models.FloatField(null=True) + lineitem_unblendedcost = models.FloatField(null=True) + lineitem_blendedrate = models.FloatField(null=True) + lineitem_blendedcost = models.FloatField(null=True) + + # Product details + product_productfamily = models.CharField(max_length=150, null=True) + product_region = models.CharField(max_length=50, null=True) + product_instancetype = models.CharField(max_length=50, null=True) + product_productname = models.CharField(max_length=256, null=True) + product_physicalcores = models.CharField(max_length=50, null=True) + product_vcpu = models.CharField(max_length=50, null=True) + product_memory = models.TextField(null=True) + product_operatingsystem = models.TextField(null=True) + product_servicecode = models.TextField(null=True) + product_sku = models.TextField(null=True) + + # Pricing + pricing_unit = models.TextField(null=True) + pricing_publicondemandcost = models.FloatField(null=True) + pricing_publicondemandrate = models.FloatField(null=True) + pricing_term = models.TextField(null=True) + + # Savings plan + savingsplan_savingsplaneffectivecost = models.FloatField(null=True) + + # Reservations + reservation_amortizedupfrontcostforusage = models.TextField(null=True) + reservation_amortizedupfrontfeeforbillingperiod = models.TextField(null=True) + reservation_endtime = models.TextField(null=True) + reservation_numberofreservations = models.TextField(null=True) + reservation_recurringfeeforusage = models.TextField(null=True) + reservation_starttime = models.TextField(null=True) + reservation_unitsperreservation = models.TextField(null=True) + reservation_unusedquantity = models.TextField(null=True) + reservation_unusedrecurringfee = models.TextField(null=True) + + # Tags and cost categories (stored as JSON text) + resourcetags = models.TextField(null=True) + costcategory = models.TextField(null=True) + + # Row UUID for daily aggregation tracking + row_uuid = models.TextField(null=True) + + +class AWSLineItem(AWSLineItemBase): + """Model for aws_line_items table (raw hourly data).""" + + class Meta: + db_table = "aws_line_items" + indexes = [ + models.Index(fields=["source", "year", "month"], name="aws_li_src_yr_mo_idx"), + ] + + +class AWSLineItemDaily(AWSLineItemBase): + """Model for aws_line_items_daily table (aggregated daily data).""" + + class Meta: + db_table = "aws_line_items_daily" + indexes = [ + models.Index(fields=["source", "year", "month"], name="aws_li_daily_src_yr_mo_idx"), + ] + + +# Mapping from table type to Django model (for self-hosted/on-prem PostgreSQL) +# AWS only has one "report type" unlike OCP which has pod_usage, storage_usage, etc. +SELF_HOSTED_MODEL_MAP = { + "aws_line_items": AWSLineItem, +} + +SELF_HOSTED_DAILY_MODEL_MAP = { + "aws_line_items": AWSLineItemDaily, +} + + +def get_self_hosted_models(): + """Get all self-hosted models (raw and daily). + + Returns a list of all Django models used for self-hosted/on-prem data storage. + Used for cleanup operations like source deletion and expired data removal. + """ + models_list = list(SELF_HOSTED_MODEL_MAP.values()) + models_list.extend(SELF_HOSTED_DAILY_MODEL_MAP.values()) + return models_list + + +def get_self_hosted_table_names(): + """Get table names for all self-hosted models. + + Returns a list of database table names for partition cleanup operations. + """ + return [model._meta.db_table for model in get_self_hosted_models()] From 4adaecdb4d77957615200cc805bb3d92ac62220d Mon Sep 17 00:00:00 2001 From: Yaron Dayagi Date: Mon, 23 Feb 2026 16:16:26 +0200 Subject: [PATCH 2/2] [FLPATH-3323] Add Azure self-hosted/on-prem support Add self-hosted PostgreSQL support for Azure provider, following the same pattern as AWS. Changes: - Add Django model for Azure line items (azure_line_items) - Add migration for partitioned Azure line item table - Add self_hosted_sql/azure/ directory with PostgreSQL-converted SQL files - Update Azure processor with _date_column, self_hosted_line_item_model - Update Azure db accessor to use get_sql_folder_name() - Add delete_self_hosted_data_by_source() for cleanup Jira: https://issues.redhat.com/browse/FLPATH-3323 Co-Authored-By: Claude Opus 4.5 Signed-off-by: Yaron Dayagi --- .../masu/database/azure_report_db_accessor.py | 43 +- .../1_resource_matching_by_cluster.sql | 169 ++++ .../2_summarize_data_by_cluster.sql | 721 ++++++++++++++++++ ...recostlineitem_project_daily_summary_p.sql | 138 ++++ .../reporting_ocpazure_matched_tags.sql | 65 ++ .../reporting_ocpazure_compute_summary_p.sql | 44 ++ ...ing_ocpazure_cost_summary_by_account_p.sql | 34 + ...ng_ocpazure_cost_summary_by_location_p.sql | 36 + ...ing_ocpazure_cost_summary_by_service_p.sql | 36 + .../reporting_ocpazure_cost_summary_p.sql | 32 + .../reporting_ocpazure_database_summary_p.sql | 44 ++ .../reporting_ocpazure_network_summary_p.sql | 41 + .../reporting_ocpazure_storage_summary_p.sql | 42 + ...recostlineitem_project_daily_summary_p.sql | 91 +++ ...g_azurecostentrylineitem_daily_summary.sql | 94 +++ ...porting_ocpinfrastructure_provider_map.sql | 61 ++ .../azure/azure_report_parquet_processor.py | 28 + .../database/test_azure_report_db_accessor.py | 32 + .../test_azure_report_parquet_processor.py | 91 +++ ...tem_managedazureopenshiftdaily_and_more.py | 348 +++++++++ koku/reporting/provider/azure/models.py | 13 + .../azure/openshift/self_hosted_models.py | 160 ++++ .../provider/azure/self_hosted_models.py | 181 +++++ 23 files changed, 2535 insertions(+), 9 deletions(-) create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/3_reporting_ocpazurecostlineitem_project_daily_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/reporting_ocpazure_matched_tags.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_compute_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_account_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_location_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_service_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_database_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_network_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_storage_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazurecostlineitem_project_daily_summary_p.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/reporting_azurecostentrylineitem_daily_summary.sql create mode 100755 koku/masu/database/self_hosted_sql/azure/reporting_ocpinfrastructure_provider_map.sql create mode 100644 koku/reporting/migrations/0352_azurelineitem_managedazureopenshiftdaily_and_more.py create mode 100644 koku/reporting/provider/azure/openshift/self_hosted_models.py create mode 100644 koku/reporting/provider/azure/self_hosted_models.py diff --git a/koku/masu/database/azure_report_db_accessor.py b/koku/masu/database/azure_report_db_accessor.py index c3c14a0132..40084f6cb5 100644 --- a/koku/masu/database/azure_report_db_accessor.py +++ b/koku/masu/database/azure_report_db_accessor.py @@ -91,7 +91,7 @@ def populate_line_item_daily_summary_table_trino(self, start_date, end_date, sou """ sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/azure/reporting_azurecostentrylineitem_daily_summary.sql" + "masu.database", f"{self.get_sql_folder_name()}/azure/reporting_azurecostentrylineitem_daily_summary.sql" ) sql = sql.decode("utf-8") uuid_str = str(uuid.uuid4()).replace("-", "_") @@ -212,7 +212,7 @@ def populate_ocp_on_azure_ui_summary_tables_trino( for table_name in tables: sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/azure/openshift/ui_summary/{table_name}.sql" + "masu.database", f"{self.get_sql_folder_name()}/azure/openshift/ui_summary/{table_name}.sql" ) sql = sql.decode("utf-8") sql_params = { @@ -280,12 +280,7 @@ def populate_ocp_on_azure_cost_daily_summary_trino( bill_id, report_period_id, ) - managed_path = f"{self.trino_sql_folder_name}/azure/openshift/populate_daily_summary" - prepare_sql, prepare_params = sql_metadata.prepare_template( - f"{managed_path}/0_prepare_daily_summary_tables.sql" - ) - LOG.info(log_json(msg="Preparing tables for OCP on Azure flow", **prepare_params)) - self._execute_trino_multipart_sql_query(prepare_sql, bind_params=prepare_params) + managed_path = f"{self.get_sql_folder_name()}/azure/openshift/populate_daily_summary" self.delete_ocp_on_azure_hive_partition_by_day( sql_metadata.days_tup, sql_metadata.cloud_provider_uuid, @@ -361,7 +356,7 @@ def get_openshift_on_cloud_matched_tags_trino( ): """Return a list of matched tags.""" sql = pkgutil.get_data( - "masu.database", f"{self.trino_sql_folder_name}/azure/openshift/reporting_ocpazure_matched_tags.sql" + "masu.database", f"{self.get_sql_folder_name()}/azure/openshift/reporting_ocpazure_matched_tags.sql" ) sql = sql.decode("utf-8") @@ -445,3 +440,33 @@ def _get_matched_tags_strings(self, bill_id, azure_provider_uuid, ocp_provider_u if matched_tags: return [json.dumps(match).replace("{", "").replace("}", "") for match in matched_tags] return matched_tags + + def delete_self_hosted_data_by_source(self, provider_uuid): + """Delete data from all self-hosted tables by source UUID (for on-prem). + + This deletes data from the line item tables when a source is deleted. + + Args: + provider_uuid: The provider UUID to delete data for + """ + from reporting.provider.azure.self_hosted_models import get_self_hosted_models + + provider_uuid_str = str(provider_uuid) + total_deleted = 0 + + with schema_context(self.schema): + for model in get_self_hosted_models(): + deleted_count, _ = model.objects.filter(source=provider_uuid_str).delete() + + if deleted_count: + LOG.info( + log_json( + msg="deleted self-hosted data by source", + table=model._meta.db_table, + provider_uuid=provider_uuid_str, + deleted_count=deleted_count, + ) + ) + total_deleted += deleted_count + + return total_deleted diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql new file mode 100755 index 0000000000..53aa51480c --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/1_resource_matching_by_cluster.sql @@ -0,0 +1,169 @@ +DELETE FROM {{schema | sqlsafe}}.managed_azure_openshift_daily_temp +WHERE source = {{cloud_provider_uuid}} +AND ocp_source = {{ocp_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + +INSERT INTO {{schema | sqlsafe}}.managed_azure_openshift_daily_temp ( + row_uuid, + usage_start, + resource_id, + service_name, + data_transfer_direction, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + date, + metername, + complete_resource_id, + tags, + resource_id_matched, + matched_tag, + source, + ocp_source, + year, + month, + day +) +WITH cte_azure_resource_names AS ( + SELECT DISTINCT resourceid + FROM {{schema | sqlsafe}}.azure_line_items + WHERE source = {{cloud_provider_uuid}} + AND year = {{year}} + AND month = {{month}} + AND date >= {{start_date}} + AND date < {{end_date}} + INTERVAL '1 day' +), +cte_array_agg_nodes AS ( + SELECT DISTINCT node + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND year = {{year}} + AND month = {{month}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +), +cte_array_agg_volumes AS ( + SELECT DISTINCT persistentvolume, csi_volume_handle + FROM {{schema | sqlsafe}}.openshift_storage_usage_line_items_daily + WHERE source = {{ocp_provider_uuid}} + AND year = {{year}} + AND month = {{month}} + AND interval_start >= {{start_date}} + AND interval_start < {{end_date}} + INTERVAL '1 day' +), +cte_matchable_resource_names AS ( + SELECT resource_names.resourceid + FROM cte_azure_resource_names AS resource_names + JOIN cte_array_agg_nodes AS nodes + ON nodes.node != '' + AND strpos(resource_names.resourceid, nodes.node) != 0 + + UNION + + SELECT resource_names.resourceid + FROM cte_azure_resource_names AS resource_names + JOIN cte_array_agg_volumes AS volumes + ON ( + (volumes.persistentvolume != '' and strpos(resource_names.resourceid, volumes.persistentvolume) != 0) + OR (volumes.csi_volume_handle != '' and strpos(resource_names.resourceid, volumes.csi_volume_handle) != 0) + ) + +), +cte_agg_tags AS ( + SELECT array_agg(cte_tag_matches.matched_tag) as matched_tags from ( + SELECT * FROM unnest(CAST(ARRAY{{matched_tag_array | sqlsafe}} AS VARCHAR[])) as t(matched_tag) + ) as cte_tag_matches +), +cte_enabled_tag_keys AS ( + SELECT + CASE WHEN array_agg(key) IS NOT NULL + THEN ARRAY['openshift_cluster', 'openshift_node', 'openshift_project'] || array_agg(key) + ELSE ARRAY['openshift_cluster', 'openshift_node', 'openshift_project'] + END as enabled_keys + FROM {{schema | sqlsafe}}.reporting_enabledtagkeys + WHERE enabled = TRUE + AND provider_type = 'Azure' +) +SELECT + azure.row_uuid, + azure.date as usage_start, + split_part(azure.resourceid, '/', 9) as resource_id, + coalesce(nullif(azure.servicename, ''), azure.metercategory) as service_name, + CASE + WHEN coalesce(nullif(servicename, ''), metercategory) = 'Virtual Network' + AND lower(consumedservice)='microsoft.compute' + AND (lower(additionalinfo)::jsonb ? 'datatransferdirection') + AND resource_names.resourceid IS NOT NULL + THEN lower(azure.additionalinfo)::jsonb->>'datatransferdirection' + ELSE NULL + END as data_transfer_direction, + (azure.additionalinfo::jsonb->>'ServiceType') as instance_type, + coalesce(nullif(azure.subscriptionid, ''), azure.subscriptionguid) as subscription_guid, + azure.subscriptionname as subscription_name, + azure.resourcelocation as resource_location, + CASE + WHEN split_part(azure.unitofmeasure, ' ', 2) = 'Hours' + THEN 'Hrs' + WHEN split_part(azure.unitofmeasure, ' ', 2) = 'GB/Month' + THEN 'GB-Mo' + WHEN split_part(azure.unitofmeasure, ' ', 2) != '' AND split_part(azure.unitofmeasure, ' ', 3) = '' + THEN split_part(azure.unitofmeasure, ' ', 2) + ELSE azure.unitofmeasure + END as unit_of_measure, + (azure.quantity * ( + CASE + WHEN split_part(azure.unitofmeasure, ' ', 1) ~ '^\d+(\.\d+)?$' + AND NOT (azure.unitofmeasure = '100 Hours' AND azure.metercategory='Virtual Machines') + AND NOT split_part(azure.unitofmeasure, ' ', 2) = '' + THEN cast(split_part(azure.unitofmeasure, ' ', 1) as INTEGER) + ELSE 1 + END) + ) as usage_quantity, + coalesce(nullif(azure.billingcurrencycode, ''), azure.billingcurrency) as currency, + azure.costinbillingcurrency as pretax_cost, + azure.date, + azure.metername, + azure.resourceid as complete_resource_id, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(azure.tags::jsonb) WHERE key = ANY(etk.enabled_keys))::text as tags, -- Limit tag keys to enabled keys + CASE WHEN resource_names.resourceid IS NOT NULL + THEN TRUE + ELSE FALSE + END as resource_id_matched, + array_to_string( + ARRAY( + SELECT tag + FROM unnest(tag_matches.matched_tags) AS tag + WHERE strpos(tags, tag) != 0 + ), + ',' + ) as matched_tag, + azure.source as source, + {{ocp_provider_uuid}} as ocp_source, + azure.year, + azure.month, + EXTRACT(DAY FROM azure.date)::text as day +FROM {{schema | sqlsafe}}.azure_line_items AS azure +CROSS JOIN cte_enabled_tag_keys as etk +LEFT JOIN cte_matchable_resource_names AS resource_names + ON azure.resourceid = resource_names.resourceid +LEFT JOIN cte_agg_tags AS tag_matches + ON EXISTS ( + SELECT 1 + FROM unnest(tag_matches.matched_tags) AS matched_tag + WHERE strpos(tags, matched_tag) != 0 + ) + AND resource_names.resourceid IS NULL +WHERE azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month= {{month}} + AND azure.date >= {{start_date}} + AND azure.date < {{end_date}} + INTERVAL '1 day' + AND (resource_names.resourceid IS NOT NULL OR tag_matches.matched_tags IS NOT NULL) +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql new file mode 100755 index 0000000000..25a37fb8f5 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/2_summarize_data_by_cluster.sql @@ -0,0 +1,721 @@ +DELETE FROM {{schema | sqlsafe}}.managed_azure_openshift_disk_capacities_temp +WHERE ocp_source = {{ocp_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + +INSERT INTO {{schema | sqlsafe}}.managed_azure_openshift_disk_capacities_temp ( + resource_id, + capacity, + usage_start, + ocp_source, + year, + month +) +SELECT + azure.resource_id, + max(az_disk_capacity.capacity) as capacity, + date(azure.date) as usage_start, + {{ocp_provider_uuid}} as ocp_source, + {{year}} as year, + {{month}} as month +FROM {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure +JOIN public.reporting_common_diskcapacity as az_disk_capacity + ON azure.metername LIKE '%%' || az_disk_capacity.product_substring || ' %%' -- space here is important to avoid partial matching + AND az_disk_capacity.provider_type = 'Azure' +WHERE azure.date >= TIMESTAMP '{{start_date | sqlsafe}}' + AND azure.date < TIMESTAMP '{{end_date | sqlsafe}}' + INTERVAL '1 day' + AND azure.service_name LIKE '%%Storage%%' + AND azure.complete_resource_id LIKE '%%Microsoft.Compute/disks/%%' + AND lower(azure.resource_id) NOT LIKE '%%_osdisk' + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.ocp_source = {{ocp_provider_uuid}} + and azure.source = {{cloud_provider_uuid}} + AND azure.resource_id_matched = True +GROUP BY azure.resource_id, date(date) +RETURNING 1; + +DELETE FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp +WHERE ocp_source = {{ocp_provider_uuid}} +AND source = {{cloud_provider_uuid}} +AND year = {{year}} +AND month = {{month}} +RETURNING 1; + +-- resource_id matching +-- Storage disk resources: +-- (PV’s Capacity) / Disk Capacity * Cost of Disk +-- PV without PVCs are Unattributed Storage +-- 2 volumes can share the same disk id +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + markup_cost, + pod_labels, + volume_labels, + tags, + resource_id_matched, + cost_category_id, + source, + ocp_source, + year, + month +) +SELECT uuid_generate_v4()::text as row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + 'Storage' as data_source, + CASE + WHEN max(persistentvolumeclaim) = '' + THEN 'Storage unattributed' + ELSE max(namespace) + END as namespace, + max(ocp.node) as node, + max(persistentvolumeclaim) as persistentvolumeclaim, + max(persistentvolume) as persistentvolume, + max(storageclass) as storageclass, + max(azure.resource_id) as resource_id, + max(azure.usage_start) as usage_start, + max(azure.usage_start) as usage_end, + max(nullif(azure.service_name, '')) as service_name, + max(azure.instance_type) as instance_type, + max(azure.subscription_guid) as subscription_guid, + max(azure.subscription_name) as subscription_name, + max(nullif(azure.resource_location, '')) as resource_location, + 'GB-Mo' as unit_of_measure, -- Has to have this unit to show up on ocp on cloud storage endpoint + max(cast(azure.usage_quantity as decimal(24,9))) as usage_quantity, + max(azure.currency) as currency, + max(persistentvolumeclaim_capacity_gigabyte) / max(az_disk.capacity) * max(azure.pretax_cost) as pretax_cost, + (max(persistentvolumeclaim_capacity_gigabyte) / max(az_disk.capacity) * max(azure.pretax_cost)) * cast({{markup}} as decimal(24,9)) as markup_cost, -- pretax_cost x markup = markup_cost + CASE + WHEN max(persistentvolumeclaim) = '' + THEN cast(NULL as text) + ELSE ocp.pod_labels + END as pod_labels, + CASE + WHEN max(persistentvolumeclaim) = '' + THEN cast(NULL as text) + ELSE ocp.volume_labels + END as volume_labels, + max(azure.tags) as tags, + bool_or(azure.resource_id_matched) as resource_id_matched, + CASE + WHEN max(persistentvolumeclaim) = '' + THEN NULL + ELSE max(ocp.cost_category_id) + END as cost_category_id, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(azure.year) as year, + max(azure.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON (azure.usage_start = ocp.usage_start) + AND ( + (strpos(azure.resource_id, ocp.persistentvolume) > 0 AND ocp.data_source = 'Storage') + OR + (lower(ocp.csi_volume_handle) = lower(azure.resource_id)) + ) + AND azure.ocp_source = ocp.source + JOIN {{schema | sqlsafe}}.managed_azure_openshift_disk_capacities_temp AS az_disk + ON az_disk.usage_start = azure.usage_start + AND az_disk.resource_id = azure.resource_id + AND az_disk.ocp_source = azure.ocp_source + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND ocp.persistentvolume is not null + -- Filter out Node Network Costs because they cannot be tied to namespace level + AND azure.data_transfer_direction IS NULL + AND azure.ocp_source = {{ocp_provider_uuid}} + and azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.resource_id_matched = True + AND ocp.namespace != 'Storage unattributed' + AND az_disk.year = {{year}} + AND az_disk.month = {{month}} + AND az_disk.ocp_source = {{ocp_provider_uuid}} + GROUP BY azure.row_uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels +RETURNING 1; + +-- Unallocated Cost: ((Disk Capacity - Sum(PV capacity) / Disk Capacity) * Cost of Disk +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + currency, + pretax_cost, + markup_cost, + resource_id_matched, + source, + ocp_source, + year, + month +) +WITH cte_total_pv_capacity as ( + SELECT + azure_resource_id, + SUM(combined_requests.capacity) as total_pv_capacity, + count(distinct cluster_id) as cluster_count + FROM ( + SELECT + ocp.persistentvolume, + max(ocp.persistentvolumeclaim_capacity_gigabyte) as capacity, + azure.resource_id as azure_resource_id, + ocp.cluster_id + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON (azure.usage_start = ocp.usage_start) + AND ( + (strpos(azure.resource_id, ocp.persistentvolume) > 0 AND ocp.data_source = 'Storage') + OR + (lower(ocp.csi_volume_handle) = lower(azure.resource_id)) + ) + WHERE ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND azure.ocp_source = {{ocp_provider_uuid}} + AND azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.resource_id_matched = True + GROUP BY ocp.persistentvolume, azure.resource_id, ocp.cluster_id + ) as combined_requests group by azure_resource_id +) +SELECT uuid_generate_v4()::text as row_uuid, -- need a new uuid or it will deduplicate + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + 'Storage' as data_source, + 'Storage unattributed' as namespace, + max(persistentvolumeclaim) as persistentvolumeclaim, + max(persistentvolume) as persistentvolume, + max(storageclass) as storageclass, + max(azure.resource_id) as resource_id, + max(azure.usage_start) as usage_start, + max(azure.usage_start) as usage_end, + max(nullif(azure.service_name, '')) as service_name, + max(azure.instance_type) as instance_type, + max(azure.subscription_guid) as subscription_guid, + max(azure.subscription_name) as subscription_name, + max(nullif(azure.resource_location, '')) as resource_location, + 'GB-Mo' as unit_of_measure, -- Has to have this unit to show up on storage endpoint + max(azure.currency) as currency, + (max(az_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(az_disk.capacity) * max(azure.pretax_cost) / max(pv_cap.cluster_count) as pretax_cost, + ((max(az_disk.capacity) - max(pv_cap.total_pv_capacity)) / max(az_disk.capacity) * max(azure.pretax_cost)) * cast({{markup}} as decimal(24,9)) / max(pv_cap.cluster_count) as markup_cost, -- pretax_cost x markup = markup_cost + bool_or(azure.resource_id_matched) as resource_id_matched, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(azure.year) as year, + max(azure.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON (azure.usage_start = ocp.usage_start) + AND ( + (strpos(azure.resource_id, ocp.persistentvolume) > 0 AND ocp.data_source = 'Storage') + OR + (lower(ocp.csi_volume_handle) = lower(azure.resource_id)) + ) + AND azure.ocp_source = ocp.source + JOIN {{schema | sqlsafe}}.managed_azure_openshift_disk_capacities_temp AS az_disk + ON az_disk.usage_start = azure.usage_start + AND az_disk.resource_id = azure.resource_id + AND az_disk.ocp_source = azure.ocp_source + LEFT JOIN cte_total_pv_capacity as pv_cap + ON pv_cap.azure_resource_id = azure.resource_id + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND azure.ocp_source = {{ocp_provider_uuid}} + AND azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.resource_id_matched = True + AND ocp.namespace != 'Storage unattributed' + AND az_disk.capacity != pv_cap.total_pv_capacity -- prevent inserting zero cost rows + AND az_disk.year = {{year}} + AND az_disk.month = {{month}} + GROUP BY azure.row_uuid, ocp.data_source, azure.resource_id +RETURNING 1; + +-- Directly Pod resource_id matching +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + markup_cost, + pod_effective_usage_cpu_core_hours, + pod_effective_usage_memory_gigabyte_hours, + node_capacity_cpu_core_hours, + node_capacity_memory_gigabyte_hours, + pod_labels, + volume_labels, + tags, + resource_id_matched, + cost_category_id, + source, + ocp_source, + year, + month +) +SELECT azure.row_uuid as row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + ocp.data_source, + ocp.namespace as namespace, + max(ocp.node) as node, + max(persistentvolumeclaim) as persistentvolumeclaim, + max(persistentvolume) as persistentvolume, + max(storageclass) as storageclass, + max(azure.resource_id) as resource_id, + max(azure.usage_start) as usage_start, + max(azure.usage_start) as usage_end, + max(nullif(azure.service_name, '')) as service_name, + max(azure.instance_type) as instance_type, + max(azure.subscription_guid) as subscription_guid, + max(azure.subscription_name) as subscription_name, + max(nullif(azure.resource_location, '')) as resource_location, + max(azure.unit_of_measure) as unit_of_measure, + max(cast(azure.usage_quantity as decimal(24,9))) as usage_quantity, + max(azure.currency) as currency, + max(cast(azure.pretax_cost as decimal(24,9))) as pretax_cost, + max(cast(azure.pretax_cost as decimal(24,9))) * cast({{markup}} as decimal(24,9)) as markup_cost, -- pretax_cost x markup = markup_cost + sum(ocp.pod_effective_usage_cpu_core_hours) as pod_effective_usage_cpu_core_hours, + sum(ocp.pod_effective_usage_memory_gigabyte_hours) as pod_effective_usage_memory_gigabyte_hours, + max(ocp.node_capacity_cpu_core_hours) as node_capacity_cpu_core_hours, + max(ocp.node_capacity_memory_gigabyte_hours) as node_capacity_memory_gigabyte_hours, + ocp.pod_labels, + ocp.volume_labels, + max(azure.tags) as tags, + bool_or(azure.resource_id_matched) as resource_id_matched, + max(ocp.cost_category_id) as cost_category_id, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(azure.year) as year, + max(azure.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON (azure.usage_start = ocp.usage_start) + AND ( + (replace(lower(azure.resource_id), '_osdisk', '') = lower(ocp.node) AND ocp.data_source = 'Pod') + OR (strpos(azure.resource_id, ocp.persistentvolume) > 0 AND ocp.data_source = 'Storage') + ) + AND ocp.source = azure.ocp_source + LEFT JOIN {{schema | sqlsafe}}.managed_azure_openshift_disk_capacities_temp as disk_cap + ON azure.resource_id = disk_cap.resource_id + AND disk_cap.year = azure.year + AND disk_cap.month = azure.month + AND disk_cap.ocp_source = azure.ocp_source + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '') + AND azure.resource_id_matched = True + -- Filter out Node Network Costs because they cannot be tied to namespace level + AND azure.data_transfer_direction IS NULL + AND azure.ocp_source = {{ocp_provider_uuid}} + AND azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND disk_cap.resource_id is NULL -- exclude any resource used in disk capacity calculations + GROUP BY azure.row_uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels +RETURNING 1; + +-- Tag matching +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + markup_cost, + pod_labels, + volume_labels, + tags, + matched_tag, + resource_id_matched, + cost_category_id, + source, + ocp_source, + year, + month +) +SELECT azure.row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + ocp.data_source, + ocp.namespace, + max(ocp.node) as node, + max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim, + max(nullif(ocp.persistentvolume, '')) as persistentvolume, + max(nullif(ocp.storageclass, '')) as storageclass, + max(azure.resource_id) as resource_id, + max(azure.usage_start) as usage_start, + max(azure.usage_start) as usage_end, + max(nullif(azure.service_name, '')) as service_name, + max(azure.instance_type) as instance_type, + max(azure.subscription_guid) as subscription_guid, + max(subscription_name) as subscription_name, + max(nullif(azure.resource_location, '')) as resource_location, + max(azure.unit_of_measure) as unit_of_measure, + max(cast(azure.usage_quantity as decimal(24,9))) as usage_quantity, + max(azure.currency) as currency, + max(cast(azure.pretax_cost as decimal(24,9))) as pretax_cost, + max(cast(azure.pretax_cost as decimal(24,9))) * cast({{markup}} as decimal(24,9)) as markup_cost, -- pretax_cost x markup = markup_cost + max(ocp.pod_labels) as pod_labels, + max(ocp.volume_labels) as volume_labels, + max(azure.tags) as tags, + max(azure.matched_tag) as matched_tag, + FALSE as resource_id_matched, + max(ocp.cost_category_id) as cost_category_id, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(azure.year) as year, + max(azure.month) as month + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON azure.usage_start = ocp.usage_start + AND ( + azure.tags::jsonb->>'openshift_project' = ocp.namespace + OR azure.tags::jsonb->>'openshift_node' = ocp.node + OR azure.tags::jsonb->>'openshift_cluster' = ocp.cluster_alias + OR azure.tags::jsonb->>'openshift_cluster' = ocp.cluster_id + OR (azure.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(azure.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.pod_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + OR (azure.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(azure.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.volume_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + ) + AND namespace != 'Worker unallocated' + AND namespace != 'Platform unallocated' + AND namespace != 'Storage unattributed' + AND namespace != 'Network unattributed' + AND azure.ocp_source = ocp.source + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND azure.ocp_source = {{ocp_provider_uuid}} + AND azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.matched_tag != '' + AND azure.resource_id_matched = False + GROUP BY azure.row_uuid, ocp.namespace, ocp.data_source +RETURNING 1; + +{%- if distribution == 'cpu' -%} +{%- set pod_column = 'pod_effective_usage_cpu_core_hours' -%} +{%- set node_column = 'node_capacity_cpu_core_hours' -%} +{%- else -%} +{%- set pod_column = 'pod_effective_usage_memory_gigabyte_hours' -%} +{%- set node_column = 'node_capacity_memory_gigabyte_hours' -%} +{%- endif -%} +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + data_transfer_direction, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + markup_cost, + tags, + resource_id_matched, + matched_tag, + cost_category_id, + source, + ocp_source, + year, + month, + day +) +WITH cte_cluster_counts AS ( + -- Count distinct clusters matching each Azure resource for tag-matched resources + SELECT azure_temp.row_uuid, + count(DISTINCT ocp.source) as cluster_count + FROM {{schema | sqlsafe}}.managed_azure_openshift_daily_temp AS azure_temp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + ON azure_temp.usage_start = ocp.usage_start + AND ( + azure_temp.tags::jsonb->>'openshift_project' = ocp.namespace + OR azure_temp.tags::jsonb->>'openshift_node' = ocp.node + OR azure_temp.tags::jsonb->>'openshift_cluster' = ocp.cluster_alias + OR azure_temp.tags::jsonb->>'openshift_cluster' = ocp.cluster_id + OR (azure_temp.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(azure_temp.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.pod_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + OR (azure_temp.matched_tag != '' AND EXISTS (SELECT 1 FROM unnest(string_to_array(azure_temp.matched_tag, ',')) AS tag WHERE strpos(replace(ocp.volume_labels, ' ', ''), replace(tag, ' ', '')) != 0)) + ) + AND ocp.namespace != 'Worker unallocated' + AND ocp.namespace != 'Platform unallocated' + AND ocp.namespace != 'Storage unattributed' + AND ocp.namespace != 'Network unattributed' + WHERE azure_temp.source = {{cloud_provider_uuid}} + AND azure_temp.year = {{year}} + AND azure_temp.month = {{month}} + AND azure_temp.resource_id_matched = FALSE + AND azure_temp.matched_tag IS NOT NULL + AND azure_temp.matched_tag != '' + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + -- Don't filter by ocp.source here - we want to count ALL matching OCP sources + GROUP BY azure_temp.row_uuid +), +cte_rankings AS ( + SELECT pds.row_uuid, + count(*) as azure_uuid_count, + COALESCE(ccc.cluster_count, 1) as cluster_count + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp AS pds + LEFT JOIN cte_cluster_counts AS ccc + ON pds.row_uuid = ccc.row_uuid + WHERE pds.ocp_source = {{ocp_provider_uuid}} AND year = {{year}} AND month = {{month}} + GROUP BY pds.row_uuid, ccc.cluster_count +) +SELECT pds.row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + NULL as data_transfer_direction, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + -- For tag-matched resources, split cost across clusters; for resource_id-matched, only split within cluster + CASE WHEN pds.resource_id_matched = FALSE + THEN usage_quantity / (r.azure_uuid_count * r.cluster_count) + ELSE usage_quantity / r.azure_uuid_count + END as usage_quantity, + currency, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * pretax_cost + WHEN resource_id_matched = FALSE + THEN pretax_cost / (r.azure_uuid_count * r.cluster_count) + ELSE pretax_cost / r.azure_uuid_count + END as pretax_cost, + CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' + THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * pretax_cost * cast({{markup}} as decimal(24,9)) + WHEN resource_id_matched = FALSE + THEN pretax_cost / (r.azure_uuid_count * r.cluster_count) * cast({{markup}} as decimal(24,9)) + ELSE pretax_cost / r.azure_uuid_count * cast({{markup}} as decimal(24,9)) + END as markup_cost, + CASE WHEN pds.pod_labels IS NOT NULL + THEN ( + SELECT json_object_agg(key, value)::text + FROM ( + SELECT * FROM jsonb_each_text(pds.pod_labels::jsonb) + UNION ALL + SELECT * FROM jsonb_each_text(pds.tags::jsonb) + ) combined(key, value) + ) + ELSE ( + SELECT json_object_agg(key, value)::text + FROM ( + SELECT * FROM jsonb_each_text(pds.volume_labels::jsonb) + UNION ALL + SELECT * FROM jsonb_each_text(pds.tags::jsonb) + ) combined(key, value) + ) + END as tags, + pds.resource_id_matched as resource_id_matched, + pds.matched_tag as matched_tag, + cost_category_id, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + pds.year as year, + pds.month as month, + EXTRACT(DAY FROM usage_start)::text as day +FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary_temp AS pds +JOIN cte_rankings as r + ON pds.row_uuid = r.row_uuid +WHERE + pds.ocp_source = {{ocp_provider_uuid}} + AND pds.year = {{year}} + AND pds.month = {{month}} + AND pds.source = {{cloud_provider_uuid}} +RETURNING 1; + +-- Network costs are currently not mapped to pod metrics +-- and are filtered out of the above SQL since that is grouped by namespace +-- and costs are split out by pod metrics, this puts all network costs per node +-- into a "Network unattributed" project with no cost split and one record per +-- data direction +INSERT INTO {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary ( + row_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + service_name, + data_transfer_direction, + instance_type, + subscription_guid, + subscription_name, + resource_location, + unit_of_measure, + usage_quantity, + currency, + pretax_cost, + markup_cost, + tags, + source, + ocp_source, + year, + month, + day +) +SELECT azure.row_uuid as row_uuid, + max(ocp.cluster_id) as cluster_id, + max(ocp.cluster_alias) as cluster_alias, + max(ocp.data_source) as data_source, + 'Network unattributed' as namespace, + ocp.node as node, + max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim, + max(nullif(ocp.persistentvolume, '')) as persistentvolume, + max(nullif(ocp.storageclass, '')) as storageclass, + max(azure.resource_id) as resource_id, + max(azure.usage_start) as usage_start, + max(azure.usage_start) as usage_end, + max(nullif(azure.service_name, '')) as service_name, + max(data_transfer_direction) as data_transfer_direction, + max(azure.instance_type) as instance_type, + max(azure.subscription_guid) as subscription_guid, + max(azure.subscription_name) as subscription_name, + max(nullif(azure.resource_location, '')) as resource_location, + max(azure.unit_of_measure) as unit_of_measure, + max(cast(azure.usage_quantity as decimal(24,9))) as usage_quantity, + max(azure.currency) as currency, + max(cast(azure.pretax_cost as decimal(24,9))) as pretax_cost, + max(cast(azure.pretax_cost as decimal(24,9))) * cast({{markup}} as decimal(24,9)) as markup_cost, -- pretax_cost x markup = markup_cost + max(azure.tags) as tags, + {{cloud_provider_uuid}} as source, + {{ocp_provider_uuid}} as ocp_source, + max(azure.year) as year, + max(azure.month) as month, + cast(EXTRACT(DAY FROM max(azure.usage_start)) as varchar) as day + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging as ocp + JOIN {{schema | sqlsafe}}.managed_azure_openshift_daily_temp as azure + ON azure.usage_start = ocp.usage_start + AND (azure.resource_id = ocp.node AND ocp.data_source = 'Pod') + AND azure.ocp_source = ocp.source + WHERE ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.usage_start >= {{start_date}} + AND ocp.usage_start < {{end_date}} + INTERVAL '1 day' + AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '') + -- Filter for Node Network Costs to tie them to the Network unattributed project + AND azure.data_transfer_direction IS NOT NULL + AND azure.data_transfer_direction != '' + AND azure.ocp_source = {{ocp_provider_uuid}} + AND azure.source = {{cloud_provider_uuid}} + AND azure.year = {{year}} + AND azure.month = {{month}} + AND azure.resource_id_matched = True + GROUP BY azure.row_uuid, ocp.node +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/3_reporting_ocpazurecostlineitem_project_daily_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/3_reporting_ocpazurecostlineitem_project_daily_summary_p.sql new file mode 100755 index 0000000000..7ad75be4e9 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/populate_daily_summary/3_reporting_ocpazurecostlineitem_project_daily_summary_p.sql @@ -0,0 +1,138 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_summary_p ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + pod_labels, + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + subscription_name, + instance_type, + service_name, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, + data_transfer_direction, + resource_location, + usage_quantity, + unit_of_measure, + currency, + pretax_cost, + markup_cost, + tags, + cost_category_id, + source_uuid +) +with cte_pg_enabled_keys as ( + select array['vm_kubevirt_io_name'] || array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type IN ('Azure', 'OCP') +), +filtered_data as ( + SELECT cluster_id as cluster_id, + cluster_alias as cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + date(usage_start) as usage_start, + date(usage_end), + subscription_guid, + subscription_name, + instance_type, + service_name, + CASE + WHEN lower(data_transfer_direction) = 'datatrin' THEN usage_quantity + ELSE 0 + END as infrastructure_data_in_gigabytes, + CASE + WHEN lower(data_transfer_direction) = 'datatrout' THEN usage_quantity + ELSE 0 + END as infrastructure_data_out_gigabytes, + -- gives each row a unique identifier for group by during back populate + CASE + WHEN lower(data_transfer_direction) = 'datatrin' THEN 'IN' + WHEN lower(data_transfer_direction) = 'datatrout' THEN 'OUT' + ELSE NULL + END as data_transfer_direction, + resource_location, + usage_quantity, + unit_of_measure, + currency, + pretax_cost, + markup_cost, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(tags::jsonb) WHERE key = ANY(pek.keys))::jsonb AS enabled_tags, + cost_category_id + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + CROSS JOIN cte_pg_enabled_keys AS pek + WHERE source = {{cloud_provider_uuid}} + AND ocp_source = {{ocp_provider_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} + AND day in {{days | inclause}} +) +SELECT + uuid_generate_v4(), + MAX({{report_period_id | sqlsafe}}) as report_period_id, + cluster_id, + MAX(cluster_alias) as cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + fd.enabled_tags as pod_labels, + resource_id, + fd.usage_start as usage_start, + fd.usage_start as usage_end, + MAX({{bill_id | sqlsafe}}) as cost_entry_bill_id, + subscription_guid, + MAX(subscription_name) as subscription_name, + instance_type, + service_name, + SUM(fd.infrastructure_data_in_gigabytes) as infrastructure_data_in_gigabytes, + SUM(fd.infrastructure_data_out_gigabytes) as infrastructure_data_out_gigabytes, + fd.data_transfer_direction as data_transfer_direction, + resource_location, + SUM(usage_quantity) as usage_quantity, + MAX(unit_of_measure) as unit_of_measure, + MAX(currency) as currency, + SUM(pretax_cost) as pretax_cost, + SUM(markup_cost) as markup_cost, + fd.enabled_tags as tags, + cost_category_id, + {{cloud_provider_uuid}}::uuid as source_uuid +FROM filtered_data as fd +GROUP BY + fd.usage_start, + cluster_id, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + fd.enabled_tags, + resource_id, + subscription_guid, + instance_type, + service_name, + fd.data_transfer_direction, + resource_location, + unit_of_measure, + currency, + cost_category_id +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/reporting_ocpazure_matched_tags.sql b/koku/masu/database/self_hosted_sql/azure/openshift/reporting_ocpazure_matched_tags.sql new file mode 100755 index 0000000000..cb61dc7225 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/reporting_ocpazure_matched_tags.sql @@ -0,0 +1,65 @@ + +WITH cte_enabled_tag_keys AS ( + SELECT array_agg(key) as key_array + FROM ( + SELECT key, + count(provider_type) AS p_count + FROM {{schema | sqlsafe}}.reporting_enabledtagkeys + WHERE enabled = true + AND provider_type IN ('Azure', 'OCP') + GROUP BY key + ) c + WHERE c.p_count > 1 +), +cte_unnested_azure_tags AS ( + SELECT DISTINCT key, + value + FROM {{schema | sqlsafe}}.azure_line_items AS azure + CROSS JOIN LATERAL jsonb_each_text(azure.tags::jsonb) AS tags(key, value) + JOIN cte_enabled_tag_keys AS etk + ON EXISTS ( + SELECT 1 + FROM unnest(etk.key_array) AS enabled_key + WHERE strpos(azure.tags, enabled_key) != 0 + ) + WHERE source = {{azure_source_uuid}} + AND year = {{year}} + AND month = {{month}} + AND date >= {{start_date}} + AND date < {{end_date}} + INTERVAL '1 day' +), +cte_unnested_ocp_tags AS ( + SELECT DISTINCT pod_key, + pod_value, + volume_key, + volume_value + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary_staging AS ocp + CROSS JOIN LATERAL jsonb_each_text(COALESCE(ocp.pod_labels::jsonb, '{}'::jsonb)) AS pod_tags(pod_key, pod_value) + CROSS JOIN LATERAL jsonb_each_text(COALESCE(ocp.volume_labels::jsonb, '{}'::jsonb)) AS volume_tags(volume_key, volume_value) + JOIN cte_enabled_tag_keys AS etk + ON EXISTS ( + SELECT 1 + FROM unnest(etk.key_array) AS enabled_key + WHERE strpos(ocp.pod_labels, enabled_key) != 0 + OR strpos(ocp.volume_labels, enabled_key) != 0 + ) + WHERE source IN {{ocp_source_uuids | inclause}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} + AND day IN {{days | inclause}} +) +SELECT '{"' || key || '": "' || value || '"}' as tag +FROM ( + SELECT DISTINCT azure.key, + azure.value + FROM cte_unnested_azure_tags AS azure + JOIN cte_unnested_ocp_tags AS ocp + ON ( + lower(azure.key) = lower(ocp.pod_key) + AND lower(azure.value) = lower(ocp.pod_value) + ) + OR ( + lower(azure.key) = lower(ocp.volume_key) + AND lower(azure.value) = lower(ocp.volume_value) + ) +) AS matches diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_compute_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_compute_summary_p.sql new file mode 100755 index 0000000000..b0e0562496 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_compute_summary_p.sql @@ -0,0 +1,44 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_compute_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + instance_type, + resource_id, + usage_quantity, + unit_of_measure, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + instance_type, + resource_id, + sum(usage_quantity) as usage_quantity, + max(unit_of_measure) as unit_of_measure, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND instance_type IS NOT NULL + AND unit_of_measure = 'Hrs' + GROUP BY usage_start, subscription_guid, instance_type, resource_id +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_account_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_account_p.sql new file mode 100755 index 0000000000..a29bd91fa7 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_account_p.sql @@ -0,0 +1,34 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_cost_summary_by_account_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, subscription_guid +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_location_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_location_p.sql new file mode 100755 index 0000000000..ac651a5564 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_location_p.sql @@ -0,0 +1,36 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_cost_summary_by_location_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + resource_location, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + resource_location, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, subscription_guid, resource_location +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_service_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_service_p.sql new file mode 100755 index 0000000000..1a9a623fdb --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_by_service_p.sql @@ -0,0 +1,36 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_cost_summary_by_service_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + service_name, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + service_name, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start, subscription_guid, service_name +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_p.sql new file mode 100755 index 0000000000..b52866ed84 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_cost_summary_p.sql @@ -0,0 +1,32 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_cost_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + pretax_cost, + markup_cost, + currency, + source_uuid, + cost_category_id +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid, + max(cost_category_id) as cost_category_id + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + GROUP BY usage_start +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_database_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_database_summary_p.sql new file mode 100755 index 0000000000..bcbbcf4583 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_database_summary_p.sql @@ -0,0 +1,44 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_database_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + service_name, + usage_quantity, + unit_of_measure, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + service_name, + sum(usage_quantity) as usage_quantity, + max(unit_of_measure) as unit_of_measure, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND ( + service_name IN ('Cosmos DB','Cache for Redis') + OR lower(service_name) LIKE '%%database%%' + ) + GROUP BY usage_start, subscription_guid, service_name +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_network_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_network_summary_p.sql new file mode 100755 index 0000000000..3ce16b1640 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_network_summary_p.sql @@ -0,0 +1,41 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_network_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + service_name, + usage_quantity, + unit_of_measure, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name), + service_name, + sum(usage_quantity) as usage_quantity, + max(unit_of_measure) as unit_of_measure, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND service_name IN ('Virtual Network','VPN','DNS','Traffic Manager','ExpressRoute','Load Balancer','Application Gateway') + GROUP BY usage_start, subscription_guid, service_name +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_storage_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_storage_summary_p.sql new file mode 100755 index 0000000000..8f132b1535 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazure_storage_summary_p.sql @@ -0,0 +1,42 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazure_storage_summary_p ( + id, + usage_start, + usage_end, + cluster_id, + cluster_alias, + subscription_guid, + subscription_name, + service_name, + usage_quantity, + unit_of_measure, + pretax_cost, + markup_cost, + currency, + source_uuid +) + SELECT uuid_generate_v4() as id, + usage_start as usage_start, + usage_start as usage_end, + max(cluster_id) as cluster_id, + max(cluster_alias) as cluster_alias, + subscription_guid, + max(subscription_name) as subscription_name, + service_name, + sum(usage_quantity) as usage_quantity, + max(unit_of_measure) as unit_of_measure, + sum(pretax_cost) as pretax_cost, + sum(markup_cost) as markup_cost, + max(currency) as currency, + {{azure_source_uuid}}::uuid as source_uuid + FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary + WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} + AND usage_start >= {{start_date}} + AND usage_start <= {{end_date}} + INTERVAL '1 day' + AND service_name LIKE '%%Storage%%' + AND unit_of_measure = 'GB-Mo' + GROUP BY usage_start, subscription_guid, service_name +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazurecostlineitem_project_daily_summary_p.sql b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazurecostlineitem_project_daily_summary_p.sql new file mode 100755 index 0000000000..77dded9dbc --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/openshift/ui_summary/reporting_ocpazurecostlineitem_project_daily_summary_p.sql @@ -0,0 +1,91 @@ +-- insert managed table data into postgres table + +INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_summary_p ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + pod_labels, + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + subscription_name, + instance_type, + service_name, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, + data_transfer_direction, + resource_location, + usage_quantity, + unit_of_measure, + currency, + pretax_cost, + markup_cost, + tags, + cost_category_id, + source_uuid +) +with cte_pg_enabled_keys as ( + select array['vm_kubevirt_io_name'] || array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type IN ('Azure', 'OCP') +) +SELECT uuid_generate_v4(), + {{report_period_id | sqlsafe}} as report_period_id, + cluster_id as cluster_id, + cluster_alias as cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + pod_labels::jsonb, + resource_id, + date(usage_start), + date(usage_end), + {{bill_id | sqlsafe}} as cost_entry_bill_id, + subscription_guid, + subscription_name, + instance_type, + service_name, + CASE + WHEN lower(data_transfer_direction) = 'datatrin' THEN usage_quantity + ELSE 0 + END as infrastructure_data_in_gigabytes, + CASE + WHEN lower(data_transfer_direction) = 'datatrout' THEN usage_quantity + ELSE 0 + END as infrastructure_data_out_gigabytes, + -- gives each row a unique identifier for group by during back populate + CASE + WHEN lower(data_transfer_direction) = 'datatrin' THEN 'IN' + WHEN lower(data_transfer_direction) = 'datatrout' THEN 'OUT' + ELSE NULL + END as data_transfer_direction, + resource_location, + usage_quantity, + unit_of_measure, + currency, + pretax_cost, + markup_cost, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(tags::jsonb) WHERE key = ANY(pek.keys))::jsonb AS tags, + cost_category_id, + source::UUID +FROM {{schema | sqlsafe}}.managed_reporting_ocpazurecostlineitem_project_daily_summary +CROSS JOIN cte_pg_enabled_keys AS pek +WHERE source = {{azure_source_uuid}} + AND ocp_source = {{ocp_source_uuid}} + AND year = {{year}} + AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND day in {{days | inclause}} +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/reporting_azurecostentrylineitem_daily_summary.sql b/koku/masu/database/self_hosted_sql/azure/reporting_azurecostentrylineitem_daily_summary.sql new file mode 100755 index 0000000000..768c7d43ea --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/reporting_azurecostentrylineitem_daily_summary.sql @@ -0,0 +1,94 @@ +INSERT INTO {{schema | sqlsafe}}.reporting_azurecostentrylineitem_daily_summary ( + uuid, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + resource_location, + service_name, + instance_type, + pretax_cost, + usage_quantity, + unit_of_measure, + currency, + tags, + instance_ids, + instance_count, + source_uuid, + markup_cost, + subscription_name +) +WITH cte_line_items AS ( + SELECT date(date) as usage_date, + INTEGER '{{bill_id | sqlsafe}}' as cost_entry_bill_id, + coalesce(nullif(subscriptionid, ''), subscriptionguid) as subscription_guid, + resourcelocation as resource_location, + coalesce(nullif(servicename, ''), metercategory) as service_name, + (additionalinfo::jsonb->>'ServiceType') as instance_type, + cast(quantity as DECIMAL(24,9)) as usage_quantity, + cast(costinbillingcurrency as DECIMAL(24,9)) as pretax_cost, + coalesce(nullif(billingcurrencycode, ''), billingcurrency) as currency, + tags::jsonb as tags, + resourceid as instance_id, + source::uuid as source_uuid, + coalesce(nullif(subscriptionname, ''), nullif(subscriptionid, ''), subscriptionguid) as subscription_name, + CASE + WHEN split_part(unitofmeasure, ' ', 1) ~ '^\d+(\.\d+)?$' AND NOT (unitofmeasure = '100 Hours' AND metercategory='Virtual Machines') AND NOT split_part(unitofmeasure, ' ', 2) = '' + THEN cast(split_part(unitofmeasure, ' ', 1) as INTEGER) + ELSE 1 + END as multiplier, + CASE + WHEN split_part(unitofmeasure, ' ', 2) IN ('Hours', 'Hour') + THEN 'Hrs' + WHEN split_part(unitofmeasure, ' ', 2) = 'GB/Month' + THEN 'GB-Mo' + WHEN split_part(unitofmeasure, ' ', 2) != '' AND split_part(unitofmeasure, ' ', 3) = '' + THEN split_part(unitofmeasure, ' ', 2) + ELSE unitofmeasure + END as unit_of_measure + FROM {{schema | sqlsafe}}.azure_line_items + WHERE source = '{{source_uuid | sqlsafe}}' + AND year = '{{year | sqlsafe}}' + AND month = '{{month | sqlsafe}}' + AND date >= '{{start_date | sqlsafe}}'::timestamp + AND date < '{{end_date | sqlsafe}}'::timestamp + INTERVAL '1 day' +), +cte_pg_enabled_keys as ( + select array_agg(key order by key) as keys + from {{schema | sqlsafe}}.reporting_enabledtagkeys + where enabled = true + and provider_type = 'Azure' +) +SELECT uuid_generate_v4() as uuid, + li.usage_date AS usage_start, + li.usage_date AS usage_end, + li.cost_entry_bill_id, + li.subscription_guid, -- account ID + li.resource_location, -- region + li.service_name, -- service + li.instance_type, + -- Azure meters usage in large blocks e.g. blocks of 100 Hours + -- We normalize this down to Hours and multiply the usage appropriately + sum(li.pretax_cost) AS pretax_cost, + sum(li.usage_quantity * li.multiplier) AS usage_quantity, + max(li.unit_of_measure) as unit_of_measure, + max(li.currency) as currency, + (SELECT json_object_agg(key, value) FROM jsonb_each_text(li.tags::jsonb) WHERE key = ANY(pek.keys))::jsonb as tags, + array_agg(DISTINCT li.instance_id) as instance_ids, + count(DISTINCT li.instance_id) as instance_count, + li.source_uuid, + sum(cast(li.pretax_cost * {{markup | sqlsafe}} AS decimal(24,9))) as markup_cost, + li.subscription_name -- account name +FROM cte_line_items AS li +CROSS JOIN + cte_pg_enabled_keys as pek +GROUP BY li.usage_date, + li.cost_entry_bill_id, + 13, -- matches column num for tags map_filter + li.subscription_guid, + li.resource_location, + li.instance_type, + li.service_name, + li.source_uuid, + li.subscription_name +RETURNING 1; diff --git a/koku/masu/database/self_hosted_sql/azure/reporting_ocpinfrastructure_provider_map.sql b/koku/masu/database/self_hosted_sql/azure/reporting_ocpinfrastructure_provider_map.sql new file mode 100755 index 0000000000..6049857284 --- /dev/null +++ b/koku/masu/database/self_hosted_sql/azure/reporting_ocpinfrastructure_provider_map.sql @@ -0,0 +1,61 @@ + +WITH cte_azure_instances AS ( + SELECT DISTINCT split_part(azure.resourceid, '/', 9) as instance, + azure.source + FROM {{schema | sqlsafe}}.azure_line_items AS azure + WHERE azure.date >= {{start_date}} + AND azure.date < {{end_date}} + INTERVAL '1 day' + {% if azure_provider_uuid -%} + AND azure.source = {{azure_provider_uuid}} + {% endif -%} + AND azure.year = {{year}} + AND azure.month = {{month}} +), +cte_ocp_nodes AS ( + {% if ocp_provider_uuid -%} + SELECT DISTINCT ocp.node, + ocp.source + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily AS ocp + WHERE ocp.interval_start >= {{start_date}} + AND ocp.interval_start < {{end_date}} + INTERVAL '1 day' + AND ocp.node IS NOT NULL + AND ocp.node != '' + AND ocp.source = {{ocp_provider_uuid}} + AND ocp.year = {{year}} + AND ocp.month = {{month}} + {% else -%} + SELECT DISTINCT ocp.node, + ocp.source + FROM {{schema | sqlsafe}}.openshift_pod_usage_line_items_daily AS ocp + INNER JOIN public.api_provider as provider + on ocp.source = provider.uuid::varchar + WHERE ocp.interval_start >= {{start_date}} + AND ocp.interval_start < {{end_date}} + INTERVAL '1 day' + AND ocp.node IS NOT NULL + AND ocp.node != '' + AND ocp.year = {{year}} + AND ocp.month = {{month}} + AND provider.type = 'OCP' + and provider.infrastructure_id IS NULL + {% endif -%} +) +SELECT DISTINCT ocp.source as ocp_uuid, + azure.source as infra_uuid, + api_provider.type as type +FROM cte_azure_instances AS azure +JOIN cte_ocp_nodes AS ocp + ON ocp.node = azure.instance +JOIN {{schema | sqlsafe}}.reporting_tenant_api_provider as api_provider + ON azure.source = api_provider.uuid::varchar + +{% if azure_provider_uuid -%} +UNION + +SELECT uuid::varchar, + {{azure_provider_uuid}}, + infra_uuid.infrastructure_type +FROM public.api_provider AS provider_union +JOIN public.api_providerinfrastructuremap AS infra_uuid + ON provider_union.infrastructure_id = infra_uuid.id +WHERE infrastructure_provider_id::varchar = {{azure_provider_uuid}} +{% endif -%} diff --git a/koku/masu/processor/azure/azure_report_parquet_processor.py b/koku/masu/processor/azure/azure_report_parquet_processor.py index 104c4c5bb0..4f531f5d9b 100755 --- a/koku/masu/processor/azure/azure_report_parquet_processor.py +++ b/koku/masu/processor/azure/azure_report_parquet_processor.py @@ -13,10 +13,18 @@ from reporting.provider.azure.models import AzureCostEntryLineItemDailySummary from reporting.provider.azure.models import TRINO_LINE_ITEM_TABLE from reporting.provider.azure.models import TRINO_OCP_ON_AZURE_DAILY_TABLE +from reporting.provider.azure.self_hosted_models import SELF_HOSTED_DAILY_MODEL_MAP +from reporting.provider.azure.self_hosted_models import SELF_HOSTED_MODEL_MAP class AzureReportParquetProcessor(ReportParquetProcessorBase): def __init__(self, manifest_id, account, s3_path, provider_uuid, start_date): + # Azure uses the same table for raw and daily + self._is_daily = "daily" in s3_path + + # Date column for deriving usage_start (Azure uses date) + self._date_column = "date" + numeric_columns = [ "quantity", "resourcerate", @@ -51,6 +59,26 @@ def postgres_summary_table(self): """Return the mode for the source specific summary table.""" return AzureCostEntryLineItemDailySummary + @property + def self_hosted_line_item_model(self): + """Return the Django model for line item data (self-hosted/on-prem only).""" + # Azure uses the same table for raw and daily + table_key = "azure_line_items" + if self._is_daily: + return SELF_HOSTED_DAILY_MODEL_MAP.get(table_key) + else: + return SELF_HOSTED_MODEL_MAP.get(table_key) + + def get_table_names_for_delete(self): + """Return all Azure table names (raw/daily is same table, ocp_on_azure).""" + from masu.util.aws.common import get_table_names_for_delete + + return get_table_names_for_delete("Azure") + + def _prepare_dataframe_for_write(self, data_frame, metadata): + """Add Azure-specific columns before writing to PostgreSQL.""" + data_frame["manifestid"] = str(self._manifest_id) + def create_bill(self, bill_date): """Create bill postgres entry.""" if isinstance(bill_date, str): diff --git a/koku/masu/test/database/test_azure_report_db_accessor.py b/koku/masu/test/database/test_azure_report_db_accessor.py index 1dd0992d95..1e0c2b8fec 100644 --- a/koku/masu/test/database/test_azure_report_db_accessor.py +++ b/koku/masu/test/database/test_azure_report_db_accessor.py @@ -432,3 +432,35 @@ def test_get_matched_tags_strings_trino_disabled(self, mock_postgres_tags, mock_ 1, self.azure_provider_uuid, self.ocp_provider_uuid, "2022-04-01", "2022-04-10" ) self.assertEqual([], result) + + @patch("reporting.provider.azure.self_hosted_models.get_self_hosted_models") + def test_delete_self_hosted_data_by_source(self, mock_get_models): + """Test delete_self_hosted_data_by_source deletes data for provider.""" + from unittest.mock import MagicMock + + # Create mock model + mock_model = MagicMock() + mock_model._meta.db_table = "azure_line_items" + mock_model.objects.filter.return_value.delete.return_value = (5, {}) + + mock_get_models.return_value = [mock_model] + + total_deleted = self.accessor.delete_self_hosted_data_by_source(self.azure_provider_uuid) + + self.assertEqual(total_deleted, 5) + mock_model.objects.filter.assert_called_once_with(source=str(self.azure_provider_uuid)) + + @patch("reporting.provider.azure.self_hosted_models.get_self_hosted_models") + def test_delete_self_hosted_data_by_source_no_data(self, mock_get_models): + """Test delete_self_hosted_data_by_source returns 0 when no data.""" + from unittest.mock import MagicMock + + mock_model = MagicMock() + mock_model._meta.db_table = "azure_line_items" + mock_model.objects.filter.return_value.delete.return_value = (0, {}) + + mock_get_models.return_value = [mock_model] + + total_deleted = self.accessor.delete_self_hosted_data_by_source(self.azure_provider_uuid) + + self.assertEqual(total_deleted, 0) diff --git a/koku/masu/test/processor/azure/test_azure_report_parquet_processor.py b/koku/masu/test/processor/azure/test_azure_report_parquet_processor.py index 8fa766fe22..ba4ec5e8e4 100644 --- a/koku/masu/test/processor/azure/test_azure_report_parquet_processor.py +++ b/koku/masu/test/processor/azure/test_azure_report_parquet_processor.py @@ -4,7 +4,9 @@ # """Test the AzureReportParquetProcessor.""" from datetime import date +from unittest.mock import patch +import pandas as pd from django_tenants.utils import schema_context from api.utils import DateHelper @@ -14,6 +16,8 @@ from reporting.provider.azure.models import AzureCostEntryLineItemDailySummary from reporting.provider.azure.models import TRINO_LINE_ITEM_TABLE from reporting.provider.azure.models import TRINO_OCP_ON_AZURE_DAILY_TABLE +from reporting.provider.azure.self_hosted_models import SELF_HOSTED_DAILY_MODEL_MAP +from reporting.provider.azure.self_hosted_models import SELF_HOSTED_MODEL_MAP class AzureReportParquetProcessorTest(MasuTestCase): @@ -77,3 +81,90 @@ def test_create_bill_with_string_arg(self): provider=self.azure_provider_uuid, ) self.assertIsNotNone(bill.first()) + + def test_is_daily_flag(self): + """Test that _is_daily is set correctly based on s3_path.""" + # Non-daily path + processor = AzureReportParquetProcessor( + self.manifest_id, self.account, "/s3/path", self.provider_uuid, self.start_date + ) + self.assertFalse(processor._is_daily) + + # Daily path + processor_daily = AzureReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.provider_uuid, self.start_date + ) + self.assertTrue(processor_daily._is_daily) + + def test_self_hosted_line_item_model(self): + """Test that self_hosted_line_item_model returns correct model.""" + # Non-daily processor + processor = AzureReportParquetProcessor( + self.manifest_id, self.account, "/s3/path", self.provider_uuid, self.start_date + ) + self.assertEqual(processor.self_hosted_line_item_model, SELF_HOSTED_MODEL_MAP.get("azure_line_items")) + + # Daily processor (Azure uses same table for raw and daily) + processor_daily = AzureReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.provider_uuid, self.start_date + ) + self.assertEqual( + processor_daily.self_hosted_line_item_model, SELF_HOSTED_DAILY_MODEL_MAP.get("azure_line_items") + ) + + def test_get_table_names_for_delete(self): + """Test that all Azure table names are returned.""" + table_names = self.processor.get_table_names_for_delete() + self.assertEqual(len(table_names), 2) + self.assertIn(TRINO_LINE_ITEM_TABLE, table_names) + self.assertIn(TRINO_OCP_ON_AZURE_DAILY_TABLE, table_names) + + def test_prepare_dataframe_for_write(self): + """Test that manifestid is added to dataframe.""" + data_frame = pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + metadata = {} + self.processor._prepare_dataframe_for_write(data_frame, metadata) + self.assertIn("manifestid", data_frame.columns) + self.assertEqual(data_frame["manifestid"].iloc[0], str(self.manifest_id)) + + @patch("masu.processor.report_parquet_processor_base.get_report_db_accessor") + @patch( + "masu.processor.azure.azure_report_parquet_processor." + "AzureReportParquetProcessor.get_or_create_postgres_partition" + ) + def test_write_to_self_hosted_table(self, mock_partition, mock_get_accessor): + """Test write_to_self_hosted_table writes data correctly.""" + # Create a daily processor (has self_hosted_line_item_model) + processor = AzureReportParquetProcessor( + self.manifest_id, self.account, "/s3/path/daily", self.provider_uuid, self.start_date + ) + + data_frame = pd.DataFrame({"col1": [1, 2], "date": pd.to_datetime(["2024-01-15", "2024-01-15"])}) + metadata = {} + + with patch("pandas.DataFrame.to_sql") as mock_to_sql: + processor.write_to_self_hosted_table(data_frame, metadata) + + # Verify columns were added + self.assertIn("manifestid", data_frame.columns) + self.assertIn("year", data_frame.columns) + self.assertIn("month", data_frame.columns) + self.assertIn("source", data_frame.columns) + self.assertIn("usage_start", data_frame.columns) + self.assertIn("id", data_frame.columns) + + # Verify partition was created + mock_partition.assert_called_once() + + # Verify to_sql was called + mock_to_sql.assert_called_once() + + @patch("masu.processor.report_parquet_processor_base.get_report_db_accessor") + def test_delete_old_data_postgres(self, mock_get_accessor): + """Test delete_old_data_postgres.""" + mock_conn = mock_get_accessor.return_value.connect.return_value.__enter__.return_value + mock_cursor = mock_conn.cursor.return_value.__enter__.return_value + mock_cursor.fetchone.return_value = None # Table doesn't exist + mock_cursor.rowcount = 0 + + self.processor.delete_old_data_postgres(self.start_date) diff --git a/koku/reporting/migrations/0352_azurelineitem_managedazureopenshiftdaily_and_more.py b/koku/reporting/migrations/0352_azurelineitem_managedazureopenshiftdaily_and_more.py new file mode 100644 index 0000000000..5daed51f4f --- /dev/null +++ b/koku/reporting/migrations/0352_azurelineitem_managedazureopenshiftdaily_and_more.py @@ -0,0 +1,348 @@ +# Generated by Django 5.2.11 on 2026-05-11 05:55 +import uuid + +from django.db import migrations +from django.db import models + +from koku.database import set_pg_extended_mode +from koku.database import unset_pg_extended_mode + + +class Migration(migrations.Migration): + + dependencies = [ + ("reporting", "0351_awslineitem_awslineitemdaily_and_more"), + ] + + operations = [ + migrations.RunPython(code=set_pg_extended_mode, reverse_code=unset_pg_extended_mode), + migrations.CreateModel( + name="AzureLineItem", + fields=[ + ( + "id", + models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False), + ), + ("usage_start", models.DateField(db_index=True, null=True)), + ("source", models.CharField(db_index=True, max_length=64, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("manifestid", models.CharField(max_length=256, null=True)), + ("billingperiodstartdate", models.DateTimeField(null=True)), + ("billingperiodenddate", models.DateTimeField(null=True)), + ("billingaccountid", models.CharField(max_length=256, null=True)), + ("billingaccountname", models.CharField(max_length=256, null=True)), + ("billingprofileid", models.CharField(max_length=256, null=True)), + ("billingprofilename", models.CharField(max_length=256, null=True)), + ("billingcurrencycode", models.CharField(max_length=20, null=True)), + ("billingcurrency", models.CharField(max_length=20, null=True)), + ("date", models.DateTimeField(db_index=True, null=True)), + ("quantity", models.FloatField(null=True)), + ("unitofmeasure", models.CharField(max_length=256, null=True)), + ("unitprice", models.FloatField(null=True)), + ("costinbillingcurrency", models.FloatField(null=True)), + ("effectiveprice", models.FloatField(null=True)), + ("paygprice", models.FloatField(null=True)), + ("resourcerate", models.FloatField(null=True)), + ("subscriptionguid", models.CharField(max_length=256, null=True)), + ("subscriptionid", models.CharField(max_length=256, null=True)), + ("subscriptionname", models.CharField(max_length=256, null=True)), + ("resourcegroup", models.CharField(max_length=256, null=True)), + ("resourceid", models.TextField(null=True)), + ("resourcelocation", models.CharField(max_length=256, null=True)), + ("resourcename", models.CharField(max_length=256, null=True)), + ("resourcetype", models.CharField(max_length=256, null=True)), + ("servicename", models.CharField(max_length=256, null=True)), + ("servicefamily", models.CharField(max_length=256, null=True)), + ("servicetier", models.CharField(max_length=256, null=True)), + ("serviceinfo1", models.TextField(null=True)), + ("serviceinfo2", models.TextField(null=True)), + ("consumedservice", models.CharField(max_length=256, null=True)), + ("metercategory", models.CharField(max_length=256, null=True)), + ("meterid", models.CharField(max_length=256, null=True)), + ("metername", models.CharField(max_length=256, null=True)), + ("meterregion", models.CharField(max_length=256, null=True)), + ("metersubcategory", models.CharField(max_length=256, null=True)), + ("productname", models.CharField(max_length=256, null=True)), + ("productorderid", models.CharField(max_length=256, null=True)), + ("productordername", models.CharField(max_length=256, null=True)), + ("accountname", models.CharField(max_length=256, null=True)), + ("accountownerid", models.CharField(max_length=256, null=True)), + ("additionalinfo", models.TextField(null=True)), + ("availabilityzone", models.CharField(max_length=256, null=True)), + ("chargetype", models.CharField(max_length=256, null=True)), + ("costcenter", models.CharField(max_length=256, null=True)), + ("frequency", models.CharField(max_length=256, null=True)), + ("invoicesectionid", models.CharField(max_length=256, null=True)), + ("invoicesectionname", models.CharField(max_length=256, null=True)), + ("isazurecrediteligible", models.CharField(max_length=10, null=True)), + ("offerid", models.CharField(max_length=256, null=True)), + ("partnumber", models.CharField(max_length=256, null=True)), + ("planname", models.CharField(max_length=256, null=True)), + ("pricingmodel", models.CharField(max_length=256, null=True)), + ("publishername", models.CharField(max_length=256, null=True)), + ("publishertype", models.CharField(max_length=256, null=True)), + ("reservationid", models.CharField(max_length=256, null=True)), + ("reservationname", models.CharField(max_length=256, null=True)), + ("term", models.CharField(max_length=256, null=True)), + ("tags", models.TextField(null=True)), + ("invoiceid", models.TextField(null=True)), + ("previousinvoiceid", models.TextField(null=True)), + ("resellername", models.TextField(null=True)), + ("resellermpnid", models.TextField(null=True)), + ("costinpricingcurrency", models.TextField(null=True)), + ("costinusd", models.TextField(null=True)), + ("marketprice", models.TextField(null=True)), + ("paygcostinbillingcurrency", models.TextField(null=True)), + ("paygcostinusd", models.TextField(null=True)), + ("pricingcurrency", models.TextField(null=True)), + ("pricingcurrencycode", models.TextField(null=True)), + ("exchangerate", models.TextField(null=True)), + ("exchangeratedate", models.TextField(null=True)), + ("exchangeratepricingtobilling", models.TextField(null=True)), + ("productid", models.TextField(null=True)), + ("product", models.TextField(null=True)), + ("publisherid", models.TextField(null=True)), + ("instancename", models.TextField(null=True)), + ("location", models.TextField(null=True)), + ("serviceperiodstartdate", models.TextField(null=True)), + ("serviceperiodenddate", models.TextField(null=True)), + ("row_uuid", models.TextField(null=True)), + ], + options={ + "db_table": "azure_line_items", + "indexes": [ + models.Index( + fields=["source", "year", "month"], + name="azure_li_src_yr_mo_idx", + ) + ], + }, + ), + migrations.CreateModel( + name="ManagedAzureOpenShiftDaily", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("service_name", models.CharField(max_length=256, null=True)), + ( + "data_transfer_direction", + models.CharField(max_length=256, null=True), + ), + ("instance_type", models.CharField(max_length=256, null=True)), + ("subscription_guid", models.CharField(max_length=256, null=True)), + ("subscription_name", models.CharField(max_length=256, null=True)), + ("resource_location", models.CharField(max_length=256, null=True)), + ("unit_of_measure", models.CharField(max_length=256, null=True)), + ("usage_quantity", models.FloatField(null=True)), + ("currency", models.CharField(max_length=256, null=True)), + ("pretax_cost", models.FloatField(null=True)), + ("date", models.DateTimeField(null=True)), + ("metername", models.CharField(max_length=256, null=True)), + ("complete_resource_id", models.CharField(max_length=256, null=True)), + ("tags", models.TextField(null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_azure_openshift_daily_temp", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="azure_daily_tmp_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="azure_daily_tmp_day_idx"), + ], + }, + ), + migrations.CreateModel( + name="ManagedAzureOpenShiftDiskCapacities", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("resource_id", models.CharField(max_length=256, null=True)), + ("capacity", models.IntegerField(null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_azure_openshift_disk_capacities_temp", + "indexes": [ + models.Index( + fields=["ocp_source", "year", "month"], + name="azure_disk_cap_src_yr_mo_idx", + ) + ], + }, + ), + migrations.CreateModel( + name="ManagedOCPAzureCostLineItemProjectDailySummary", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("cluster_id", models.CharField(max_length=256, null=True)), + ("cluster_alias", models.CharField(max_length=256, null=True)), + ("data_source", models.CharField(max_length=256, null=True)), + ("namespace", models.CharField(max_length=256, null=True)), + ("node", models.CharField(max_length=256, null=True)), + ("persistentvolumeclaim", models.CharField(max_length=256, null=True)), + ("persistentvolume", models.CharField(max_length=256, null=True)), + ("storageclass", models.CharField(max_length=256, null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("usage_end", models.DateTimeField(null=True)), + ("service_name", models.CharField(max_length=256, null=True)), + ( + "data_transfer_direction", + models.CharField(max_length=256, null=True), + ), + ("instance_type", models.CharField(max_length=256, null=True)), + ("subscription_guid", models.CharField(max_length=256, null=True)), + ("subscription_name", models.CharField(max_length=256, null=True)), + ("resource_location", models.CharField(max_length=256, null=True)), + ("unit_of_measure", models.CharField(max_length=256, null=True)), + ("usage_quantity", models.FloatField(null=True)), + ("currency", models.CharField(max_length=256, null=True)), + ("pretax_cost", models.FloatField(null=True)), + ("markup_cost", models.FloatField(null=True)), + ("pod_cost", models.FloatField(null=True)), + ("project_markup_cost", models.FloatField(null=True)), + ("pod_effective_usage_cpu_core_hours", models.FloatField(null=True)), + ( + "pod_effective_usage_memory_gigabyte_hours", + models.FloatField(null=True), + ), + ("node_capacity_cpu_core_hours", models.FloatField(null=True)), + ("node_capacity_memory_gigabyte_hours", models.FloatField(null=True)), + ("pod_labels", models.TextField(null=True)), + ("volume_labels", models.TextField(null=True)), + ("tags", models.TextField(null=True)), + ("project_rank", models.IntegerField(null=True)), + ("data_source_rank", models.IntegerField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("cost_category_id", models.IntegerField(null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_reporting_ocpazurecostlineitem_project_daily_summary", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="azure_summ_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="azure_summ_day_idx"), + models.Index(fields=["usage_start"], name="azure_summ_usage_start_idx"), + ], + }, + ), + migrations.CreateModel( + name="ManagedOCPAzureCostLineItemProjectDailySummaryTemp", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_uuid", models.CharField(max_length=256, null=True)), + ("cluster_id", models.CharField(max_length=256, null=True)), + ("cluster_alias", models.CharField(max_length=256, null=True)), + ("data_source", models.CharField(max_length=256, null=True)), + ("namespace", models.CharField(max_length=256, null=True)), + ("node", models.CharField(max_length=256, null=True)), + ("persistentvolumeclaim", models.CharField(max_length=256, null=True)), + ("persistentvolume", models.CharField(max_length=256, null=True)), + ("storageclass", models.CharField(max_length=256, null=True)), + ("resource_id", models.CharField(max_length=256, null=True)), + ("usage_start", models.DateTimeField(null=True)), + ("usage_end", models.DateTimeField(null=True)), + ("service_name", models.CharField(max_length=256, null=True)), + ( + "data_transfer_direction", + models.CharField(max_length=256, null=True), + ), + ("instance_type", models.CharField(max_length=256, null=True)), + ("subscription_guid", models.CharField(max_length=256, null=True)), + ("subscription_name", models.CharField(max_length=256, null=True)), + ("resource_location", models.CharField(max_length=256, null=True)), + ("unit_of_measure", models.CharField(max_length=256, null=True)), + ("usage_quantity", models.FloatField(null=True)), + ("currency", models.CharField(max_length=256, null=True)), + ("pretax_cost", models.FloatField(null=True)), + ("markup_cost", models.FloatField(null=True)), + ("pod_cost", models.FloatField(null=True)), + ("project_markup_cost", models.FloatField(null=True)), + ("pod_effective_usage_cpu_core_hours", models.FloatField(null=True)), + ( + "pod_effective_usage_memory_gigabyte_hours", + models.FloatField(null=True), + ), + ("node_capacity_cpu_core_hours", models.FloatField(null=True)), + ("node_capacity_memory_gigabyte_hours", models.FloatField(null=True)), + ("pod_labels", models.TextField(null=True)), + ("volume_labels", models.TextField(null=True)), + ("tags", models.TextField(null=True)), + ("project_rank", models.IntegerField(null=True)), + ("data_source_rank", models.IntegerField(null=True)), + ("matched_tag", models.CharField(max_length=256, null=True)), + ("resource_id_matched", models.BooleanField(null=True)), + ("cost_category_id", models.IntegerField(null=True)), + ("source", models.CharField(max_length=256, null=True)), + ("ocp_source", models.CharField(max_length=256, null=True)), + ("year", models.CharField(max_length=4, null=True)), + ("month", models.CharField(max_length=2, null=True)), + ("day", models.CharField(max_length=2, null=True)), + ], + options={ + "db_table": "managed_reporting_ocpazurecostlineitem_project_daily_summary_temp", + "indexes": [ + models.Index( + fields=["source", "ocp_source", "year", "month"], + name="azure_summ_tmp_src_yr_mo_idx", + ), + models.Index(fields=["day"], name="azure_summ_tmp_day_idx"), + ], + }, + ), + migrations.RunPython(code=unset_pg_extended_mode, reverse_code=set_pg_extended_mode), + ] diff --git a/koku/reporting/provider/azure/models.py b/koku/reporting/provider/azure/models.py index 12c7b02357..1b01c64df0 100644 --- a/koku/reporting/provider/azure/models.py +++ b/koku/reporting/provider/azure/models.py @@ -466,3 +466,16 @@ class Meta: "reporting.TenantAPIProvider", on_delete=models.CASCADE, unique=False, null=True, db_column="source_uuid" ) subscription_name = models.TextField(null=True) + + +from reporting.provider.azure.self_hosted_models import AzureLineItem # noqa: E402, F401 +from reporting.provider.azure.openshift.self_hosted_models import ManagedAzureOpenShiftDaily # noqa: E402, F401 +from reporting.provider.azure.openshift.self_hosted_models import ( # noqa: E402, F401 + ManagedAzureOpenShiftDiskCapacities, +) +from reporting.provider.azure.openshift.self_hosted_models import ( # noqa: E402, F401 + ManagedOCPAzureCostLineItemProjectDailySummary, +) +from reporting.provider.azure.openshift.self_hosted_models import ( # noqa: E402, F401 + ManagedOCPAzureCostLineItemProjectDailySummaryTemp, +) diff --git a/koku/reporting/provider/azure/openshift/self_hosted_models.py b/koku/reporting/provider/azure/openshift/self_hosted_models.py new file mode 100644 index 0000000000..b9b58d5a2f --- /dev/null +++ b/koku/reporting/provider/azure/openshift/self_hosted_models.py @@ -0,0 +1,160 @@ +# +# Copyright 2025 Red Hat Inc. +# SPDX-License-Identifier: Apache-2.0 +# +"""Django models for Azure OpenShift managed tables (on-prem PostgreSQL).""" +from django.db import models + + +class ManagedAzureOpenShiftDaily(models.Model): + class Meta: + db_table = "managed_azure_openshift_daily_temp" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="azure_daily_tmp_src_yr_mo_idx"), + models.Index(fields=["day"], name="azure_daily_tmp_day_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + resource_id = models.CharField(max_length=256, null=True) + service_name = models.CharField(max_length=256, null=True) + data_transfer_direction = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + subscription_guid = models.CharField(max_length=256, null=True) + subscription_name = models.CharField(max_length=256, null=True) + resource_location = models.CharField(max_length=256, null=True) + unit_of_measure = models.CharField(max_length=256, null=True) + usage_quantity = models.FloatField(null=True) + currency = models.CharField(max_length=256, null=True) + pretax_cost = models.FloatField(null=True) + date = models.DateTimeField(null=True) + metername = models.CharField(max_length=256, null=True) + complete_resource_id = models.CharField(max_length=256, null=True) + tags = models.TextField(null=True) + resource_id_matched = models.BooleanField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedOCPAzureCostLineItemProjectDailySummaryTemp(models.Model): + class Meta: + db_table = "managed_reporting_ocpazurecostlineitem_project_daily_summary_temp" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="azure_summ_tmp_src_yr_mo_idx"), + models.Index(fields=["day"], name="azure_summ_tmp_day_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + cluster_id = models.CharField(max_length=256, null=True) + cluster_alias = models.CharField(max_length=256, null=True) + data_source = models.CharField(max_length=256, null=True) + namespace = models.CharField(max_length=256, null=True) + node = models.CharField(max_length=256, null=True) + persistentvolumeclaim = models.CharField(max_length=256, null=True) + persistentvolume = models.CharField(max_length=256, null=True) + storageclass = models.CharField(max_length=256, null=True) + resource_id = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + usage_end = models.DateTimeField(null=True) + service_name = models.CharField(max_length=256, null=True) + data_transfer_direction = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + subscription_guid = models.CharField(max_length=256, null=True) + subscription_name = models.CharField(max_length=256, null=True) + resource_location = models.CharField(max_length=256, null=True) + unit_of_measure = models.CharField(max_length=256, null=True) + usage_quantity = models.FloatField(null=True) + currency = models.CharField(max_length=256, null=True) + pretax_cost = models.FloatField(null=True) + markup_cost = models.FloatField(null=True) + pod_cost = models.FloatField(null=True) + project_markup_cost = models.FloatField(null=True) + pod_effective_usage_cpu_core_hours = models.FloatField(null=True) + pod_effective_usage_memory_gigabyte_hours = models.FloatField(null=True) + node_capacity_cpu_core_hours = models.FloatField(null=True) + node_capacity_memory_gigabyte_hours = models.FloatField(null=True) + pod_labels = models.TextField(null=True) + volume_labels = models.TextField(null=True) + tags = models.TextField(null=True) + project_rank = models.IntegerField(null=True) + data_source_rank = models.IntegerField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + resource_id_matched = models.BooleanField(null=True) + cost_category_id = models.IntegerField(null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedOCPAzureCostLineItemProjectDailySummary(models.Model): + class Meta: + db_table = "managed_reporting_ocpazurecostlineitem_project_daily_summary" + indexes = [ + models.Index(fields=["source", "ocp_source", "year", "month"], name="azure_summ_src_yr_mo_idx"), + models.Index(fields=["day"], name="azure_summ_day_idx"), + models.Index(fields=["usage_start"], name="azure_summ_usage_start_idx"), + ] + + row_uuid = models.CharField(max_length=256, null=True) + cluster_id = models.CharField(max_length=256, null=True) + cluster_alias = models.CharField(max_length=256, null=True) + data_source = models.CharField(max_length=256, null=True) + namespace = models.CharField(max_length=256, null=True) + node = models.CharField(max_length=256, null=True) + persistentvolumeclaim = models.CharField(max_length=256, null=True) + persistentvolume = models.CharField(max_length=256, null=True) + storageclass = models.CharField(max_length=256, null=True) + resource_id = models.CharField(max_length=256, null=True) + usage_start = models.DateTimeField(null=True) + usage_end = models.DateTimeField(null=True) + service_name = models.CharField(max_length=256, null=True) + data_transfer_direction = models.CharField(max_length=256, null=True) + instance_type = models.CharField(max_length=256, null=True) + subscription_guid = models.CharField(max_length=256, null=True) + subscription_name = models.CharField(max_length=256, null=True) + resource_location = models.CharField(max_length=256, null=True) + unit_of_measure = models.CharField(max_length=256, null=True) + usage_quantity = models.FloatField(null=True) + currency = models.CharField(max_length=256, null=True) + pretax_cost = models.FloatField(null=True) + markup_cost = models.FloatField(null=True) + pod_cost = models.FloatField(null=True) + project_markup_cost = models.FloatField(null=True) + pod_effective_usage_cpu_core_hours = models.FloatField(null=True) + pod_effective_usage_memory_gigabyte_hours = models.FloatField(null=True) + node_capacity_cpu_core_hours = models.FloatField(null=True) + node_capacity_memory_gigabyte_hours = models.FloatField(null=True) + pod_labels = models.TextField(null=True) + volume_labels = models.TextField(null=True) + tags = models.TextField(null=True) + project_rank = models.IntegerField(null=True) + data_source_rank = models.IntegerField(null=True) + matched_tag = models.CharField(max_length=256, null=True) + resource_id_matched = models.BooleanField(null=True) + cost_category_id = models.IntegerField(null=True) + source = models.CharField(max_length=256, null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + day = models.CharField(max_length=2, null=True) + + +class ManagedAzureOpenShiftDiskCapacities(models.Model): + class Meta: + db_table = "managed_azure_openshift_disk_capacities_temp" + indexes = [ + models.Index(fields=["ocp_source", "year", "month"], name="azure_disk_cap_src_yr_mo_idx"), + ] + + resource_id = models.CharField(max_length=256, null=True) + capacity = models.IntegerField(null=True) + usage_start = models.DateTimeField(null=True) + ocp_source = models.CharField(max_length=256, null=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) diff --git a/koku/reporting/provider/azure/self_hosted_models.py b/koku/reporting/provider/azure/self_hosted_models.py new file mode 100644 index 0000000000..0716dda7c7 --- /dev/null +++ b/koku/reporting/provider/azure/self_hosted_models.py @@ -0,0 +1,181 @@ +# +# Copyright 2025 Red Hat Inc. +# SPDX-License-Identifier: Apache-2.0 +# +"""Django models for Azure line item tables (on-prem PostgreSQL storage).""" +from uuid import uuid4 + +from django.db import models + + +class AzureLineItem(models.Model): + """Model for azure_line_items table. + + Azure uses a single table for both raw and daily data (no separate daily table). + This model replaces the raw SQL table creation for on-prem PostgreSQL storage. + """ + + class Meta: + db_table = "azure_line_items" + indexes = [ + models.Index(fields=["source", "year", "month"], name="azure_li_src_yr_mo_idx"), + ] + + class PartitionInfo: + partition_type = "RANGE" + partition_cols = ["usage_start"] + + # UUID primary key for compatibility with SQLAlchemy to_sql and partitioned tables + id = models.UUIDField(primary_key=True, default=uuid4) + + # Partition column - DateField for proper range partitioning + usage_start = models.DateField(null=True, db_index=True) + + # Partition-related columns + source = models.CharField(max_length=64, null=True, db_index=True) + year = models.CharField(max_length=4, null=True) + month = models.CharField(max_length=2, null=True) + + # Manifest tracking + manifestid = models.CharField(max_length=256, null=True) + + # Azure billing columns + billingperiodstartdate = models.DateTimeField(null=True) + billingperiodenddate = models.DateTimeField(null=True) + billingaccountid = models.CharField(max_length=256, null=True) + billingaccountname = models.CharField(max_length=256, null=True) + billingprofileid = models.CharField(max_length=256, null=True) + billingprofilename = models.CharField(max_length=256, null=True) + billingcurrencycode = models.CharField(max_length=20, null=True) + billingcurrency = models.CharField(max_length=20, null=True) + + # Date and usage columns + date = models.DateTimeField(null=True, db_index=True) + quantity = models.FloatField(null=True) + unitofmeasure = models.CharField(max_length=256, null=True) + unitprice = models.FloatField(null=True) + + # Cost columns + costinbillingcurrency = models.FloatField(null=True) + effectiveprice = models.FloatField(null=True) + paygprice = models.FloatField(null=True) + resourcerate = models.FloatField(null=True) + + # Subscription columns + subscriptionguid = models.CharField(max_length=256, null=True) + subscriptionid = models.CharField(max_length=256, null=True) + subscriptionname = models.CharField(max_length=256, null=True) + + # Resource columns + resourcegroup = models.CharField(max_length=256, null=True) + resourceid = models.TextField(null=True) + resourcelocation = models.CharField(max_length=256, null=True) + resourcename = models.CharField(max_length=256, null=True) + resourcetype = models.CharField(max_length=256, null=True) + + # Service columns + servicename = models.CharField(max_length=256, null=True) + servicefamily = models.CharField(max_length=256, null=True) + servicetier = models.CharField(max_length=256, null=True) + serviceinfo1 = models.TextField(null=True) + serviceinfo2 = models.TextField(null=True) + consumedservice = models.CharField(max_length=256, null=True) + + # Meter columns + metercategory = models.CharField(max_length=256, null=True) + meterid = models.CharField(max_length=256, null=True) + metername = models.CharField(max_length=256, null=True) + meterregion = models.CharField(max_length=256, null=True) + metersubcategory = models.CharField(max_length=256, null=True) + + # Product columns + productname = models.CharField(max_length=256, null=True) + productorderid = models.CharField(max_length=256, null=True) + productordername = models.CharField(max_length=256, null=True) + + # Other columns + accountname = models.CharField(max_length=256, null=True) + accountownerid = models.CharField(max_length=256, null=True) + additionalinfo = models.TextField(null=True) + availabilityzone = models.CharField(max_length=256, null=True) + chargetype = models.CharField(max_length=256, null=True) + costcenter = models.CharField(max_length=256, null=True) + frequency = models.CharField(max_length=256, null=True) + invoicesectionid = models.CharField(max_length=256, null=True) + invoicesectionname = models.CharField(max_length=256, null=True) + isazurecrediteligible = models.CharField(max_length=10, null=True) + offerid = models.CharField(max_length=256, null=True) + partnumber = models.CharField(max_length=256, null=True) + planname = models.CharField(max_length=256, null=True) + pricingmodel = models.CharField(max_length=256, null=True) + publishername = models.CharField(max_length=256, null=True) + publishertype = models.CharField(max_length=256, null=True) + reservationid = models.CharField(max_length=256, null=True) + reservationname = models.CharField(max_length=256, null=True) + term = models.CharField(max_length=256, null=True) + + # Tags (stored as JSON text) + tags = models.TextField(null=True) + + # Invoice columns + invoiceid = models.TextField(null=True) + previousinvoiceid = models.TextField(null=True) + + # Reseller columns + resellername = models.TextField(null=True) + resellermpnid = models.TextField(null=True) + + # Additional cost columns + costinpricingcurrency = models.TextField(null=True) + costinusd = models.TextField(null=True) + marketprice = models.TextField(null=True) + paygcostinbillingcurrency = models.TextField(null=True) + paygcostinusd = models.TextField(null=True) + pricingcurrency = models.TextField(null=True) + pricingcurrencycode = models.TextField(null=True) + + # Exchange rate columns + exchangerate = models.TextField(null=True) + exchangeratedate = models.TextField(null=True) + exchangeratepricingtobilling = models.TextField(null=True) + + # Additional product columns + productid = models.TextField(null=True) + product = models.TextField(null=True) + publisherid = models.TextField(null=True) + + # Additional resource column + instancename = models.TextField(null=True) + + # Additional location column + location = models.TextField(null=True) + + # Service period columns + serviceperiodstartdate = models.TextField(null=True) + serviceperiodenddate = models.TextField(null=True) + + # Row UUID for deduplication + row_uuid = models.TextField(null=True) + + +# Azure uses the same table for raw and daily data +SELF_HOSTED_MODEL_MAP = { + "azure_line_items": AzureLineItem, +} + +SELF_HOSTED_DAILY_MODEL_MAP = { + "azure_line_items": AzureLineItem, +} + + +def get_self_hosted_models(): + """Get all self-hosted models. + + Returns a list of all Django models used for self-hosted/on-prem data storage. + """ + return [AzureLineItem] + + +def get_self_hosted_table_names(): + """Get table names for all self-hosted models.""" + return [model._meta.db_table for model in get_self_hosted_models()]