diff --git a/koku/api/report/ocp/provider_map.py b/koku/api/report/ocp/provider_map.py index b185d5763d..c58d5b629a 100644 --- a/koku/api/report/ocp/provider_map.py +++ b/koku/api/report/ocp/provider_map.py @@ -22,7 +22,6 @@ from django.db.models.functions import Cast from django.db.models.functions import Coalesce from django.db.models.functions import Concat -from django.db.models.functions import Greatest from django.db.models.functions import JSONObject from django.db.models.functions import Round from django.db.models.functions.comparison import NullIf @@ -194,45 +193,6 @@ def __cost_model_distributed_cost(self, cost_model_rate_type, exchange_rate_colu * Coalesce(exchange_rate_column, Value(1, output_field=DecimalField())), ) - def _cpu_usage_sum(self): - """Return a new Sum expression for CPU usage hours.""" - return Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField()))) - - def _cpu_request_sum(self): - """Return a new Sum expression for CPU request hours.""" - return Sum(Coalesce(F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField()))) - - def _memory_usage_sum(self): - """Return a new Sum expression for memory usage hours.""" - return Sum(Coalesce(F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField()))) - - def _memory_request_sum(self): - """Return a new Sum expression for memory request hours.""" - return Sum(Coalesce(F("pod_request_memory_gigabyte_hours"), Value(0, output_field=DecimalField()))) - - def _efficiency_annotations(self, usage_sum_prop, request_sum_prop, cost_total_expr): - """Build usage_efficiency and wasted_cost annotation expressions.""" - _dec = DecimalField(max_digits=33, decimal_places=15) - return { - "usage_efficiency": Coalesce( - Round(usage_sum_prop / NullIf(request_sum_prop, Value(0, output_field=_dec)) * Value(100)), - Value(0), - output_field=IntegerField(), - ), - "wasted_cost": Coalesce( - Greatest( - cost_total_expr - * ( - Value(1, output_field=_dec) - - usage_sum_prop / NullIf(request_sum_prop, Value(0, output_field=_dec)) - ), - Value(0, output_field=_dec), - ), - Value(0, output_field=_dec), - output_field=_dec, - ), - } - def __init__(self, provider, report_type, schema_name): """Constructor.""" self._schema_name = schema_name @@ -441,10 +401,35 @@ def __init__(self, provider, report_type, schema_name): "pod_request_cpu_core_hours", default=Value(0, output_field=DecimalField()) ), "limit": Sum("pod_limit_cpu_core_hours", default=Value(0, output_field=DecimalField())), - **self._efficiency_annotations( - self._cpu_usage_sum(), - self._cpu_request_sum(), - self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_cpu_cost, + "usage_efficiency": Coalesce( + Round( + Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField()))) + / NullIf( + Sum( + Coalesce( + F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField()) + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Value(100) + ), + Value(0), + output_field=IntegerField(), + ), + "wasted_cost": Coalesce( + Sum( + Coalesce( + F("wasted_cpu_cost"), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Coalesce( + F("exchange_rate"), + Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + output_field=DecimalField(max_digits=33, decimal_places=15), ), }, "capacity_aggregate": { @@ -501,10 +486,38 @@ def __init__(self, provider, report_type, schema_name): "limit": Sum( Coalesce(F("pod_limit_cpu_core_hours"), Value(0, output_field=DecimalField())) ), - **self._efficiency_annotations( - self._cpu_usage_sum(), - self._cpu_request_sum(), - self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_cpu_cost, + "usage_efficiency": Coalesce( + Round( + Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField()))) + / NullIf( + Sum( + Coalesce( + F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField()) + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Value(100) + ), + Value(0), + output_field=IntegerField(), + ), + "wasted_cost": Coalesce( + Sum( + Coalesce( + F("wasted_cpu_cost"), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Coalesce( + F("exchange_rate"), + Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + output_field=DecimalField(max_digits=33, decimal_places=15), + ), + "wasted_cost_units": Coalesce( + "currency_annotation", Value("USD", output_field=CharField()) ), "capacity": Max("cluster_capacity_cpu_core_hours"), # overwritten in capacity aggregation "clusters": ArrayAgg( @@ -598,10 +611,40 @@ def __init__(self, provider, report_type, schema_name): "limit": Sum( "pod_limit_memory_gigabyte_hours", default=Value(0, output_field=DecimalField()) ), - **self._efficiency_annotations( - self._memory_usage_sum(), - self._memory_request_sum(), - self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_memory_cost, + "usage_efficiency": Coalesce( + Round( + Sum( + Coalesce( + F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField()) + ) + ) + / NullIf( + Sum( + Coalesce( + F("pod_request_memory_gigabyte_hours"), + Value(0, output_field=DecimalField()), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Value(100) + ), + Value(0), + output_field=IntegerField(), + ), + "wasted_cost": Coalesce( + Sum( + Coalesce( + F("wasted_memory_cost"), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Coalesce( + F("exchange_rate"), + Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + output_field=DecimalField(max_digits=33, decimal_places=15), ), }, "capacity_aggregate": { @@ -659,10 +702,43 @@ def __init__(self, provider, report_type, schema_name): "limit": Sum( Coalesce(F("pod_limit_memory_gigabyte_hours"), Value(0, output_field=DecimalField())) ), - **self._efficiency_annotations( - self._memory_usage_sum(), - self._memory_request_sum(), - self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_memory_cost, + "usage_efficiency": Coalesce( + Round( + Sum( + Coalesce( + F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField()) + ) + ) + / NullIf( + Sum( + Coalesce( + F("pod_request_memory_gigabyte_hours"), + Value(0, output_field=DecimalField()), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Value(100) + ), + Value(0), + output_field=IntegerField(), + ), + "wasted_cost": Coalesce( + Sum( + Coalesce( + F("wasted_memory_cost"), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + * Coalesce( + F("exchange_rate"), + Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)), + ) + ), + Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)), + output_field=DecimalField(max_digits=33, decimal_places=15), + ), + "wasted_cost_units": Coalesce( + "currency_annotation", Value("USD", output_field=CharField()) ), "capacity": Max( "cluster_capacity_memory_gigabyte_hours" diff --git a/koku/api/report/ocp/query_handler.py b/koku/api/report/ocp/query_handler.py index a3d878400a..2793a7cc08 100644 --- a/koku/api/report/ocp/query_handler.py +++ b/koku/api/report/ocp/query_handler.py @@ -112,6 +112,14 @@ def __init__(self, parameters): ocp_pack_definitions["compute"] = {"keys": ["compute"], "units": "mig_compute_units"} ocp_pack_definitions["memory"] = {"keys": ["memory"], "units": "mig_memory_units"} ocp_pack_definitions["gpu_count"] = {"keys": ["gpu_count"], "units": "gpu_count_units"} + ocp_pack_definitions["score_efficiency"] = { + "keys": {"usage_efficiency": {"key": "usage_efficiency_percent", "group": "score"}}, + "units": None, + } + ocp_pack_definitions["score_wasted"] = { + "keys": {"wasted_cost": {"key": "wasted_cost", "group": "score"}}, + "units": "wasted_cost_units", + } # super() needs to be called after _mapper and _limit is set super().__init__(parameters) @@ -229,8 +237,7 @@ def _format_query_response(self): output["data"] = self.query_data self.query_sum = self._pack_data_object(self.query_sum, **self._mapper.PACK_DEFINITIONS) - if "score" in self.query_sum: - self.query_sum["total_score"] = self.query_sum.pop("score") + self.query_sum["total_score"] = self.query_sum.pop("score", {}) output["total"] = self.query_sum if self._delta: @@ -238,21 +245,6 @@ def _format_query_response(self): return output - def _pack_score(self, row, should_compute): - """Shape efficiency annotations into the score response object.""" - if should_compute: - row["score"] = { - "usage_efficiency_percent": row.pop("usage_efficiency", 0), - "wasted_cost": { - "value": row.pop("wasted_cost", Decimal(0)), - "units": self.currency, - }, - } - else: - row.pop("usage_efficiency", None) - row.pop("wasted_cost", None) - row["score"] = {} - def execute_query(self): # noqa: C901 """Execute query and return provided data. @@ -273,9 +265,8 @@ def execute_query(self): # noqa: C901 query_group_by = ["date"] + group_by_value query_order_by = ["-date", self.order] - query_data = query.values(*query_group_by).annotate( - **{k: v for k, v in self.report_annotations.items() if k not in group_by_value} - ) + row_annotations = {k: v for k, v in self.report_annotations.items() if k not in group_by_value} + query_data = query.values(*query_group_by).annotate(**row_annotations) if is_grouped_by_project(self.parameters): query_data = self._project_classification_annotation(query_data) @@ -303,16 +294,23 @@ def execute_query(self): # noqa: C901 if self._report_type in ("cpu", "memory"): has_tag_interaction = self._tag_group_by or self.get_tag_filter_keys() should_compute = not has_tag_interaction and len(group_by_value) <= 1 - self._pack_score(query_sum, should_compute) + if should_compute: + query_sum["wasted_cost_units"] = self.currency + else: + query_sum.pop("usage_efficiency", None) + query_sum.pop("wasted_cost", None) + query_sum.pop("wasted_cost_units", None) if self._delta: query_data = self.add_deltas(query_data, query_sum) query_data = self.order_by(query_data, query_order_by) - if self._report_type in ("cpu", "memory"): + if self._report_type in ("cpu", "memory") and not should_compute: for row in query_data: - self._pack_score(row, should_compute) + row.pop("usage_efficiency", None) + row.pop("wasted_cost", None) + row.pop("wasted_cost_units", None) for row in query_data: if tag_iterable := row.get("tags"): @@ -324,6 +322,12 @@ def execute_query(self): # noqa: C901 data = [{"date": date_string, "vm_names": query_data}] else: data = list(query_data) + if self._report_type in ("cpu", "memory"): + # Pack score fields so the CSV renderer emits score.usage_efficiency_percent + # and score.wasted_cost.* columns to match the JSON response structure. + score_pack = {k: v for k, v in self._mapper.PACK_DEFINITIONS.items() if k.startswith("score_")} + for row in data: + self._pack_data_object(row, **score_pack) else: # Pass in a copy of the group by without the added # tag column name prefix diff --git a/koku/api/report/test/ocp/test_ocp_provider_map.py b/koku/api/report/test/ocp/test_ocp_provider_map.py index 244a3311d9..78f7551cd1 100644 --- a/koku/api/report/test/ocp/test_ocp_provider_map.py +++ b/koku/api/report/test/ocp/test_ocp_provider_map.py @@ -71,3 +71,158 @@ def test_distributed_costs_use_correct_exchange_rate(self): ).aggregate(val=getattr(mapper, mapper_attr)) self.assertEqual(result["val"], cost_val * expected_multiplier) + + def test_wasted_cost_cpu_reads_precomputed_column_with_exchange_rate(self): + """wasted_cpu_cost_expr sums the pre-computed column and applies exchange_rate. + + The SQL summarization pre-computes per-pod waste before any cross-pod aggregation, + avoiding the pooling problem (over- and under-utilised pods cancelling each other). + The provider map expression simply reads that value and converts to display currency. + """ + cluster_id = "s1a-waste-test-cpu" + usage_date = self.dh.yesterday.date() + _dec = DecimalField(max_digits=33, decimal_places=15) + exchange_rate = Value(Decimal("2"), output_field=_dec) + + with tenant_context(self.tenant): + OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id).delete() + + # Row representing a pod group with pre-computed waste=50 (e.g. 50% waste on 100 cost) + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="s1a-test", + usage_start=usage_date, + usage_end=usage_date, + wasted_cpu_cost=Decimal("50"), + ) + # Row representing an over-utilised pod group — waste already clamped to 0 by SQL + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="s1a-test", + usage_start=usage_date, + usage_end=usage_date, + wasted_cpu_cost=Decimal("0"), + ) + + mapper = OCPProviderMap(provider=Provider.PROVIDER_OCP, report_type="cpu", schema_name=self.schema_name) + aggregates = mapper.report_type_map["aggregates"] + qs = OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id, data_source="Pod").annotate( + exchange_rate=exchange_rate, + infra_exchange_rate=exchange_rate, + ) + result = qs.aggregate(wasted_cost=aggregates["wasted_cost"]) + # SUM(50 + 0) * exchange_rate(2) = 100 + self.assertEqual(result["wasted_cost"], Decimal("100")) + + def test_wasted_cost_memory_reads_precomputed_column_with_exchange_rate(self): + """wasted_memory_cost_expr mirrors the CPU expression for the memory report.""" + cluster_id = "s1a-waste-test-mem" + usage_date = self.dh.yesterday.date() + _dec = DecimalField(max_digits=33, decimal_places=15) + one = Value(Decimal("1"), output_field=_dec) + + with tenant_context(self.tenant): + OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id).delete() + + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="s1a-test", + usage_start=usage_date, + usage_end=usage_date, + wasted_memory_cost=Decimal("30"), + ) + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="s1a-test", + usage_start=usage_date, + usage_end=usage_date, + wasted_memory_cost=Decimal("20"), + ) + + mapper = OCPProviderMap(provider=Provider.PROVIDER_OCP, report_type="memory", schema_name=self.schema_name) + aggregates = mapper.report_type_map["aggregates"] + qs = OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id, data_source="Pod").annotate( + exchange_rate=one, + infra_exchange_rate=one, + ) + result = qs.aggregate(wasted_cost=aggregates["wasted_cost"]) + self.assertEqual(result["wasted_cost"], Decimal("50")) + + def test_wasted_cost_null_column_contributes_zero(self): + """Rows without a pre-computed wasted_cost (NULL) are treated as zero.""" + cluster_id = "s1a-waste-test-null" + usage_date = self.dh.yesterday.date() + _dec = DecimalField(max_digits=33, decimal_places=15) + one = Value(Decimal("1"), output_field=_dec) + + with tenant_context(self.tenant): + OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id).delete() + + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="s1a-test", + usage_start=usage_date, + usage_end=usage_date, + wasted_cpu_cost=None, + ) + + mapper = OCPProviderMap(provider=Provider.PROVIDER_OCP, report_type="cpu", schema_name=self.schema_name) + aggregates = mapper.report_type_map["aggregates"] + qs = OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id, data_source="Pod").annotate( + exchange_rate=one, + infra_exchange_rate=one, + ) + result = qs.aggregate(wasted_cost=aggregates["wasted_cost"]) + self.assertEqual(result["wasted_cost"], Decimal("0")) + + def test_wasted_cost_order_by_desc_matches_annotation(self): + """Grouped wasted_cost annotation is sortable (used by order_by[wasted_cost]).""" + cluster_id = "s1a-waste-order" + usage_date = self.dh.yesterday.date() + _dec = DecimalField(max_digits=33, decimal_places=15) + one = Value(Decimal("1"), output_field=_dec) + + with tenant_context(self.tenant): + OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id).delete() + + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="low-waste", + usage_start=usage_date, + usage_end=usage_date, + wasted_cpu_cost=Decimal("10"), + ) + self.baker.make( + OCPUsageLineItemDailySummary, + cluster_id=cluster_id, + data_source="Pod", + namespace="high-waste", + usage_start=usage_date, + usage_end=usage_date, + wasted_cpu_cost=Decimal("90"), + ) + + mapper = OCPProviderMap(provider=Provider.PROVIDER_OCP, report_type="cpu", schema_name=self.schema_name) + ann = mapper.report_type_map["annotations"] + rows = list( + OCPUsageLineItemDailySummary.objects.filter(cluster_id=cluster_id, data_source="Pod") + .annotate(exchange_rate=one, infra_exchange_rate=one) + .values("namespace") + .annotate(**{k: ann[k] for k in ("wasted_cost",) if k in ann}) + .order_by("-wasted_cost") + ) + self.assertEqual(len(rows), 2) + self.assertEqual(rows[0]["namespace"], "high-waste") + self.assertEqual(rows[1]["namespace"], "low-waste") diff --git a/koku/api/report/test/ocp/test_ocp_query_handler.py b/koku/api/report/test/ocp/test_ocp_query_handler.py index 6fa7ec475b..0ca2db366b 100644 --- a/koku/api/report/test/ocp/test_ocp_query_handler.py +++ b/koku/api/report/test/ocp/test_ocp_query_handler.py @@ -2214,14 +2214,20 @@ def test_efficiency_score_memory_report(self): self.assertIn("value", total_score["wasted_cost"]) self.assertIn("units", total_score["wasted_cost"]) - def test_efficiency_score_multi_group_by_returns_empty(self): - """Test that multi group-by returns empty total_score.""" + def test_efficiency_score_multi_group_by_omits_efficiency_percent(self): + """Test that multi group-by omits usage_efficiency_percent but still returns wasted_cost. + + usage_efficiency_percent is a ratio-of-sums that is only meaningful for a single + group-by dimension. wasted_cost is always a SUM of values and is returned regardless. + """ url = "?group_by[cluster]=*&group_by[project]=*" query_params = self.mocked_query_params(url, OCPCpuView) handler = OCPReportQueryHandler(query_params) query_output = handler.execute_query() total = query_output.get("total") - self.assertEqual(total.get("total_score"), {}) + total_score = total.get("total_score") + self.assertNotIn("usage_efficiency_percent", total_score) + self.assertIn("wasted_cost", total_score) def test_efficiency_score_cost_report_excluded(self): """Test that cost report does not include total_score.""" diff --git a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_node_p.sql b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_node_p.sql index cb500a74ef..1ecd228f7e 100644 --- a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_node_p.sql +++ b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_node_p.sql @@ -4,6 +4,58 @@ WHERE usage_start >= {{start_date}}::date AND source_uuid = {{source_uuid}} ; +WITH line_items AS ( + SELECT + cluster_id, + cluster_alias, + node, + resource_id, + usage_start, + cost_model_rate_type, + infrastructure_raw_cost, + infrastructure_markup_cost, + cost_model_cpu_cost, + cost_model_memory_cost, + cost_model_volume_cost, + cost_model_gpu_cost, + pod_usage_cpu_core_hours, + pod_request_cpu_core_hours, + pod_effective_usage_cpu_core_hours, + pod_limit_cpu_core_hours, + pod_usage_memory_gigabyte_hours, + pod_request_memory_gigabyte_hours, + pod_effective_usage_memory_gigabyte_hours, + pod_limit_memory_gigabyte_hours, + cluster_capacity_cpu_core_hours, + cluster_capacity_memory_gigabyte_hours, + node_capacity_cpu_cores, + node_capacity_cpu_core_hours, + node_capacity_memory_gigabytes, + node_capacity_memory_gigabyte_hours, + cost_category_id, + raw_currency, + distributed_cost, + data_source, + -- Window: sums from base rows (rate_type IS NULL) at node+cluster+date level. + -- Enables cross-rate-type waste computation in the outer SELECT. + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, node, usage_start) AS grp_cpu_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, node, usage_start) AS grp_cpu_effective, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, node, usage_start) AS grp_mem_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, node, usage_start) AS grp_mem_effective + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary + WHERE usage_start >= {{start_date}}::date + AND usage_start <= {{end_date}}::date + AND source_uuid = {{source_uuid}} + AND data_source = 'Pod' + AND namespace IS DISTINCT FROM 'Worker unallocated' + AND namespace IS DISTINCT FROM 'Platform unallocated' + AND namespace IS DISTINCT FROM 'Network unattributed' + and namespace IS DISTINCT FROM 'Storage unattributed' +) INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_node_p ( id, cluster_id, @@ -38,7 +90,9 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_node_p ( source_uuid, cost_category_id, raw_currency, - distributed_cost + distributed_cost, + wasted_cpu_cost, + wasted_memory_cost ) SELECT uuid_generate_v4() as id, cluster_id, @@ -73,15 +127,23 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_node_p ( {{source_uuid}}::uuid as source_uuid, max(cost_category_id) as cost_category_id, max(raw_currency) as raw_currency, - sum(distributed_cost) as distributed_cost - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary - WHERE usage_start >= {{start_date}}::date - AND usage_start <= {{end_date}}::date - AND source_uuid = {{source_uuid}} - AND data_source = 'Pod' - AND namespace IS DISTINCT FROM 'Worker unallocated' - AND namespace IS DISTINCT FROM 'Platform unallocated' - AND namespace IS DISTINCT FROM 'Network unattributed' - and namespace IS DISTINCT FROM 'Storage unattributed' + sum(distributed_cost) as distributed_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_cpu_cost, 0)) + * (1 - MAX(grp_cpu_usage) / NULLIF(MAX(grp_cpu_effective), 0)) + ) + ELSE 0 + END AS wasted_cpu_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_memory_cost, 0)) + * (1 - MAX(grp_mem_usage) / NULLIF(MAX(grp_mem_effective), 0)) + ) + ELSE 0 + END AS wasted_memory_cost + FROM line_items GROUP BY usage_start, cluster_id, cluster_alias, node, cost_model_rate_type ; diff --git a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_project_p.sql b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_project_p.sql index 682ea1fc29..460b2fc770 100644 --- a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_project_p.sql +++ b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_by_project_p.sql @@ -4,6 +4,54 @@ WHERE usage_start >= {{start_date}}::date AND source_uuid = {{source_uuid}} ; +WITH line_items AS ( + SELECT + cluster_id, + cluster_alias, + namespace, + resource_id, + usage_start, + cost_model_rate_type, + infrastructure_raw_cost, + infrastructure_markup_cost, + cost_model_cpu_cost, + cost_model_memory_cost, + cost_model_volume_cost, + cost_model_gpu_cost, + pod_usage_cpu_core_hours, + pod_request_cpu_core_hours, + pod_effective_usage_cpu_core_hours, + pod_limit_cpu_core_hours, + pod_usage_memory_gigabyte_hours, + pod_request_memory_gigabyte_hours, + pod_effective_usage_memory_gigabyte_hours, + pod_limit_memory_gigabyte_hours, + cluster_capacity_cpu_core_hours, + cluster_capacity_memory_gigabyte_hours, + cost_category_id, + raw_currency, + distributed_cost, + data_source, + -- Window: sums from base rows (rate_type IS NULL) at namespace+cluster+date level. + -- Enables cross-rate-type waste computation in the outer SELECT. + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, namespace, usage_start) AS grp_cpu_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, namespace, usage_start) AS grp_cpu_effective, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, namespace, usage_start) AS grp_mem_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, namespace, usage_start) AS grp_mem_effective + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary + WHERE usage_start >= {{start_date}}::date + AND usage_start <= {{end_date}}::date + AND source_uuid = {{source_uuid}} + AND data_source = 'Pod' + AND namespace IS DISTINCT FROM 'Worker unallocated' + AND namespace IS DISTINCT FROM 'Platform unallocated' + AND namespace IS DISTINCT FROM 'Network unattributed' + AND namespace IS DISTINCT FROM 'Storage unattributed' +) INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_project_p ( id, cluster_id, @@ -34,7 +82,9 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_project_p ( source_uuid, cost_category_id, raw_currency, - distributed_cost + distributed_cost, + wasted_cpu_cost, + wasted_memory_cost ) SELECT uuid_generate_v4() as id, cluster_id, @@ -65,15 +115,23 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_by_project_p ( {{source_uuid}}::uuid as source_uuid, max(cost_category_id) as cost_category_id, max(raw_currency) as raw_currency, - sum(distributed_cost) as distributed_cost - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary - WHERE usage_start >= {{start_date}}::date - AND usage_start <= {{end_date}}::date - AND source_uuid = {{source_uuid}} - AND data_source = 'Pod' - AND namespace IS DISTINCT FROM 'Worker unallocated' - AND namespace IS DISTINCT FROM 'Platform unallocated' - AND namespace IS DISTINCT FROM 'Network unattributed' - AND namespace IS DISTINCT FROM 'Storage unattributed' + sum(distributed_cost) as distributed_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_cpu_cost, 0)) + * (1 - MAX(grp_cpu_usage) / NULLIF(MAX(grp_cpu_effective), 0)) + ) + ELSE 0 + END AS wasted_cpu_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_memory_cost, 0)) + * (1 - MAX(grp_mem_usage) / NULLIF(MAX(grp_mem_effective), 0)) + ) + ELSE 0 + END AS wasted_memory_cost + FROM line_items GROUP BY usage_start, cluster_id, cluster_alias, namespace, cost_model_rate_type ; diff --git a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_p.sql b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_p.sql index 09128298c8..aaadbd9ffe 100644 --- a/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_p.sql +++ b/koku/masu/database/sql/openshift/ui_summary/reporting_ocp_pod_summary_p.sql @@ -4,6 +4,53 @@ WHERE usage_start >= {{start_date}}::date AND source_uuid = {{source_uuid}} ; +WITH line_items AS ( + SELECT + cluster_id, + cluster_alias, + resource_id, + usage_start, + cost_model_rate_type, + infrastructure_raw_cost, + infrastructure_markup_cost, + cost_model_cpu_cost, + cost_model_memory_cost, + cost_model_volume_cost, + cost_model_gpu_cost, + pod_usage_cpu_core_hours, + pod_request_cpu_core_hours, + pod_effective_usage_cpu_core_hours, + pod_limit_cpu_core_hours, + pod_usage_memory_gigabyte_hours, + pod_request_memory_gigabyte_hours, + pod_effective_usage_memory_gigabyte_hours, + pod_limit_memory_gigabyte_hours, + cluster_capacity_cpu_core_hours, + cluster_capacity_memory_gigabyte_hours, + cost_category_id, + raw_currency, + distributed_cost, + data_source, + -- Window: sums from base rows (rate_type IS NULL) at cluster+date level. + -- Enables cross-rate-type waste computation in the outer SELECT. + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, usage_start) AS grp_cpu_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_cpu_core_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, usage_start) AS grp_cpu_effective, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, usage_start) AS grp_mem_usage, + SUM(CASE WHEN cost_model_rate_type IS NULL THEN COALESCE(pod_effective_usage_memory_gigabyte_hours, 0) ELSE 0 END) + OVER (PARTITION BY cluster_id, usage_start) AS grp_mem_effective + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary + WHERE usage_start >= {{start_date}}::date + AND usage_start <= {{end_date}}::date + AND source_uuid = {{source_uuid}} + AND data_source = 'Pod' + AND namespace IS DISTINCT FROM 'Worker unallocated' + AND namespace IS DISTINCT FROM 'Platform unallocated' + AND namespace IS DISTINCT FROM 'Network unattributed' + AND namespace IS DISTINCT FROM 'Storage unattributed' +) INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_p ( id, cluster_id, @@ -33,7 +80,9 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_p ( source_uuid, cost_category_id, raw_currency, - distributed_cost + distributed_cost, + wasted_cpu_cost, + wasted_memory_cost ) SELECT uuid_generate_v4() as id, cluster_id, @@ -63,15 +112,23 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocp_pod_summary_p ( {{source_uuid}}::uuid as source_uuid, max(cost_category_id) as cost_category_id, max(raw_currency) as raw_currency, - sum(distributed_cost) as distributed_cost - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary - WHERE usage_start >= {{start_date}}::date - AND usage_start <= {{end_date}}::date - AND source_uuid = {{source_uuid}} - AND data_source = 'Pod' - AND namespace IS DISTINCT FROM 'Worker unallocated' - AND namespace IS DISTINCT FROM 'Platform unallocated' - AND namespace IS DISTINCT FROM 'Network unattributed' - AND namespace IS DISTINCT FROM 'Storage unattributed' + sum(distributed_cost) as distributed_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_cpu_cost, 0)) + * (1 - MAX(grp_cpu_usage) / NULLIF(MAX(grp_cpu_effective), 0)) + ) + ELSE 0 + END AS wasted_cpu_cost, + CASE + WHEN cost_model_rate_type IN ('Infrastructure', 'Supplementary') + THEN GREATEST(0, + SUM(COALESCE(cost_model_memory_cost, 0)) + * (1 - MAX(grp_mem_usage) / NULLIF(MAX(grp_mem_effective), 0)) + ) + ELSE 0 + END AS wasted_memory_cost + FROM line_items GROUP BY usage_start, cluster_id, cluster_alias, cost_model_rate_type ; diff --git a/koku/reporting/migrations/0351_ocppodsummarybynodep_wasted_cpu_cost_and_more.py b/koku/reporting/migrations/0351_ocppodsummarybynodep_wasted_cpu_cost_and_more.py new file mode 100644 index 0000000000..8a0913b5da --- /dev/null +++ b/koku/reporting/migrations/0351_ocppodsummarybynodep_wasted_cpu_cost_and_more.py @@ -0,0 +1,53 @@ +# Generated by Django 5.2.12 on 2026-05-14 17:15 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ("reporting", "0350_widen_ratestousage_label_hash"), + ] + + operations = [ + migrations.AddField( + model_name="ocppodsummarybynodep", + name="wasted_cpu_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocppodsummarybynodep", + name="wasted_memory_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocppodsummarybyprojectp", + name="wasted_cpu_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocppodsummarybyprojectp", + name="wasted_memory_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocppodsummaryp", + name="wasted_cpu_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocppodsummaryp", + name="wasted_memory_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocpusagelineitemdailysummary", + name="wasted_cpu_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + migrations.AddField( + model_name="ocpusagelineitemdailysummary", + name="wasted_memory_cost", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ), + ] diff --git a/koku/reporting/provider/ocp/models.py b/koku/reporting/provider/ocp/models.py index 3922abb227..9e58c8bf2b 100644 --- a/koku/reporting/provider/ocp/models.py +++ b/koku/reporting/provider/ocp/models.py @@ -204,6 +204,8 @@ class Meta: source_uuid = models.UUIDField(unique=False, null=True) raw_currency = models.TextField(null=True) distributed_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + wasted_cpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + wasted_memory_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) class OCPTagsValues(models.Model): @@ -740,6 +742,10 @@ class Meta: cost_model_gpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) cost_model_rate_type = models.TextField(null=True) + # Pre-computed per-line-item wasted cost (pipeline-time, correct grain) + wasted_cpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + wasted_memory_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + class OCPPodSummaryByProjectP(models.Model): """A summarized partitioned table specifically for UI API queries. @@ -800,6 +806,10 @@ class Meta: cost_model_gpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) cost_model_rate_type = models.TextField(null=True) + # Pre-computed per-line-item wasted cost (pipeline-time, correct grain) + wasted_cpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + wasted_memory_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + class OCPPodSummaryByNodeP(models.Model): """A summarized partitioned table specifically for UI API queries. @@ -864,6 +874,10 @@ class Meta: cost_model_gpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) cost_model_rate_type = models.TextField(null=True) + # Pre-computed per-line-item wasted cost (pipeline-time, correct grain) + wasted_cpu_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + wasted_memory_cost = models.DecimalField(max_digits=33, decimal_places=15, null=True) + class OCPVolumeSummaryP(models.Model): """A summarized partitioned table specifically for UI API queries.