Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 132 additions & 56 deletions koku/api/report/ocp/provider_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from django.db.models.functions import Cast
from django.db.models.functions import Coalesce
from django.db.models.functions import Concat
from django.db.models.functions import Greatest
from django.db.models.functions import JSONObject
from django.db.models.functions import Round
from django.db.models.functions.comparison import NullIf
Expand Down Expand Up @@ -194,45 +193,6 @@ def __cost_model_distributed_cost(self, cost_model_rate_type, exchange_rate_colu
* Coalesce(exchange_rate_column, Value(1, output_field=DecimalField())),
)

def _cpu_usage_sum(self):
"""Return a new Sum expression for CPU usage hours."""
return Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField())))

def _cpu_request_sum(self):
"""Return a new Sum expression for CPU request hours."""
return Sum(Coalesce(F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField())))

def _memory_usage_sum(self):
"""Return a new Sum expression for memory usage hours."""
return Sum(Coalesce(F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField())))

def _memory_request_sum(self):
"""Return a new Sum expression for memory request hours."""
return Sum(Coalesce(F("pod_request_memory_gigabyte_hours"), Value(0, output_field=DecimalField())))

def _efficiency_annotations(self, usage_sum_prop, request_sum_prop, cost_total_expr):
"""Build usage_efficiency and wasted_cost annotation expressions."""
_dec = DecimalField(max_digits=33, decimal_places=15)
return {
"usage_efficiency": Coalesce(
Round(usage_sum_prop / NullIf(request_sum_prop, Value(0, output_field=_dec)) * Value(100)),
Value(0),
output_field=IntegerField(),
),
"wasted_cost": Coalesce(
Greatest(
cost_total_expr
* (
Value(1, output_field=_dec)
- usage_sum_prop / NullIf(request_sum_prop, Value(0, output_field=_dec))
),
Value(0, output_field=_dec),
),
Value(0, output_field=_dec),
output_field=_dec,
),
}

def __init__(self, provider, report_type, schema_name):
"""Constructor."""
self._schema_name = schema_name
Expand Down Expand Up @@ -441,10 +401,35 @@ def __init__(self, provider, report_type, schema_name):
"pod_request_cpu_core_hours", default=Value(0, output_field=DecimalField())
),
"limit": Sum("pod_limit_cpu_core_hours", default=Value(0, output_field=DecimalField())),
**self._efficiency_annotations(
self._cpu_usage_sum(),
self._cpu_request_sum(),
self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_cpu_cost,
"usage_efficiency": Coalesce(
Round(
Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField())))
/ NullIf(
Sum(
Coalesce(
F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField())
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Value(100)
),
Value(0),
output_field=IntegerField(),
),
"wasted_cost": Coalesce(
Sum(
Coalesce(
F("wasted_cpu_cost"),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Coalesce(
F("exchange_rate"),
Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
output_field=DecimalField(max_digits=33, decimal_places=15),
),
},
"capacity_aggregate": {
Expand Down Expand Up @@ -501,10 +486,38 @@ def __init__(self, provider, report_type, schema_name):
"limit": Sum(
Coalesce(F("pod_limit_cpu_core_hours"), Value(0, output_field=DecimalField()))
),
**self._efficiency_annotations(
self._cpu_usage_sum(),
self._cpu_request_sum(),
self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_cpu_cost,
"usage_efficiency": Coalesce(
Round(
Sum(Coalesce(F("pod_usage_cpu_core_hours"), Value(0, output_field=DecimalField())))
/ NullIf(
Sum(
Coalesce(
F("pod_request_cpu_core_hours"), Value(0, output_field=DecimalField())
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Value(100)
),
Value(0),
output_field=IntegerField(),
),
"wasted_cost": Coalesce(
Sum(
Coalesce(
F("wasted_cpu_cost"),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Coalesce(
F("exchange_rate"),
Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
output_field=DecimalField(max_digits=33, decimal_places=15),
),
"wasted_cost_units": Coalesce(
"currency_annotation", Value("USD", output_field=CharField())
),
"capacity": Max("cluster_capacity_cpu_core_hours"), # overwritten in capacity aggregation
"clusters": ArrayAgg(
Expand Down Expand Up @@ -598,10 +611,40 @@ def __init__(self, provider, report_type, schema_name):
"limit": Sum(
"pod_limit_memory_gigabyte_hours", default=Value(0, output_field=DecimalField())
),
**self._efficiency_annotations(
self._memory_usage_sum(),
self._memory_request_sum(),
self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_memory_cost,
"usage_efficiency": Coalesce(
Round(
Sum(
Coalesce(
F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField())
)
)
/ NullIf(
Sum(
Coalesce(
F("pod_request_memory_gigabyte_hours"),
Value(0, output_field=DecimalField()),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Value(100)
),
Value(0),
output_field=IntegerField(),
),
"wasted_cost": Coalesce(
Sum(
Coalesce(
F("wasted_memory_cost"),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Coalesce(
F("exchange_rate"),
Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
output_field=DecimalField(max_digits=33, decimal_places=15),
),
},
"capacity_aggregate": {
Expand Down Expand Up @@ -659,10 +702,43 @@ def __init__(self, provider, report_type, schema_name):
"limit": Sum(
Coalesce(F("pod_limit_memory_gigabyte_hours"), Value(0, output_field=DecimalField()))
),
**self._efficiency_annotations(
self._memory_usage_sum(),
self._memory_request_sum(),
self.cloud_infrastructure_cost + self.markup_cost + self.cost_model_memory_cost,
"usage_efficiency": Coalesce(
Round(
Sum(
Coalesce(
F("pod_usage_memory_gigabyte_hours"), Value(0, output_field=DecimalField())
)
)
/ NullIf(
Sum(
Coalesce(
F("pod_request_memory_gigabyte_hours"),
Value(0, output_field=DecimalField()),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Value(100)
),
Value(0),
output_field=IntegerField(),
),
"wasted_cost": Coalesce(
Sum(
Coalesce(
F("wasted_memory_cost"),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
* Coalesce(
F("exchange_rate"),
Value(1, output_field=DecimalField(max_digits=33, decimal_places=15)),
)
),
Value(0, output_field=DecimalField(max_digits=33, decimal_places=15)),
output_field=DecimalField(max_digits=33, decimal_places=15),
),
"wasted_cost_units": Coalesce(
"currency_annotation", Value("USD", output_field=CharField())
),
"capacity": Max(
"cluster_capacity_memory_gigabyte_hours"
Expand Down
50 changes: 27 additions & 23 deletions koku/api/report/ocp/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ def __init__(self, parameters):
ocp_pack_definitions["compute"] = {"keys": ["compute"], "units": "mig_compute_units"}
ocp_pack_definitions["memory"] = {"keys": ["memory"], "units": "mig_memory_units"}
ocp_pack_definitions["gpu_count"] = {"keys": ["gpu_count"], "units": "gpu_count_units"}
ocp_pack_definitions["score_efficiency"] = {
"keys": {"usage_efficiency": {"key": "usage_efficiency_percent", "group": "score"}},
"units": None,
}
ocp_pack_definitions["score_wasted"] = {
"keys": {"wasted_cost": {"key": "wasted_cost", "group": "score"}},
"units": "wasted_cost_units",
}

# super() needs to be called after _mapper and _limit is set
super().__init__(parameters)
Expand Down Expand Up @@ -229,30 +237,14 @@ def _format_query_response(self):

output["data"] = self.query_data
self.query_sum = self._pack_data_object(self.query_sum, **self._mapper.PACK_DEFINITIONS)
if "score" in self.query_sum:
self.query_sum["total_score"] = self.query_sum.pop("score")
self.query_sum["total_score"] = self.query_sum.pop("score", {})
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

total_score must only be injected for compute (cpu/memory) reports. Cost and volume reports have no efficiency score, so this key should be absent entirely.

Suggested change
self.query_sum["total_score"] = self.query_sum.pop("score", {})
if self._report_type in ("cpu", "memory"):
self.query_sum["total_score"] = self.query_sum.pop("score", {})
else:
self.query_sum.pop("score", None)

output["total"] = self.query_sum

if self._delta:
output["delta"] = self.query_delta

return output

def _pack_score(self, row, should_compute):
"""Shape efficiency annotations into the score response object."""
if should_compute:
row["score"] = {
"usage_efficiency_percent": row.pop("usage_efficiency", 0),
"wasted_cost": {
"value": row.pop("wasted_cost", Decimal(0)),
"units": self.currency,
},
}
else:
row.pop("usage_efficiency", None)
row.pop("wasted_cost", None)
row["score"] = {}

def execute_query(self): # noqa: C901
"""Execute query and return provided data.

Expand All @@ -273,9 +265,8 @@ def execute_query(self): # noqa: C901
query_group_by = ["date"] + group_by_value
query_order_by = ["-date", self.order]

query_data = query.values(*query_group_by).annotate(
**{k: v for k, v in self.report_annotations.items() if k not in group_by_value}
)
row_annotations = {k: v for k, v in self.report_annotations.items() if k not in group_by_value}
query_data = query.values(*query_group_by).annotate(**row_annotations)

if is_grouped_by_project(self.parameters):
query_data = self._project_classification_annotation(query_data)
Expand Down Expand Up @@ -303,16 +294,23 @@ def execute_query(self): # noqa: C901
if self._report_type in ("cpu", "memory"):
has_tag_interaction = self._tag_group_by or self.get_tag_filter_keys()
should_compute = not has_tag_interaction and len(group_by_value) <= 1
self._pack_score(query_sum, should_compute)
if should_compute:
query_sum["wasted_cost_units"] = self.currency
else:
query_sum.pop("usage_efficiency", None)
query_sum.pop("wasted_cost", None)
query_sum.pop("wasted_cost_units", None)
Comment on lines +299 to +302
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wasted_cost is a simple aggregate SUM and is meaningful even when multiple group-by dimensions are used. Only usage_efficiency_percent (a ratio-of-sums) should be suppressed for multi-group-by. Keep wasted_cost and its units so the packing step can include it in total_score.

Suggested change
else:
query_sum.pop("usage_efficiency", None)
query_sum.pop("wasted_cost", None)
query_sum.pop("wasted_cost_units", None)
else:
query_sum.pop("usage_efficiency", None)
query_sum["wasted_cost_units"] = self.currency


if self._delta:
query_data = self.add_deltas(query_data, query_sum)

query_data = self.order_by(query_data, query_order_by)

if self._report_type in ("cpu", "memory"):
if self._report_type in ("cpu", "memory") and not should_compute:
for row in query_data:
self._pack_score(row, should_compute)
row.pop("usage_efficiency", None)
row.pop("wasted_cost", None)
row.pop("wasted_cost_units", None)

for row in query_data:
if tag_iterable := row.get("tags"):
Expand All @@ -324,6 +322,12 @@ def execute_query(self): # noqa: C901
data = [{"date": date_string, "vm_names": query_data}]
else:
data = list(query_data)
if self._report_type in ("cpu", "memory"):
# Pack score fields so the CSV renderer emits score.usage_efficiency_percent
# and score.wasted_cost.* columns to match the JSON response structure.
score_pack = {k: v for k, v in self._mapper.PACK_DEFINITIONS.items() if k.startswith("score_")}
for row in data:
self._pack_data_object(row, **score_pack)
else:
# Pass in a copy of the group by without the added
# tag column name prefix
Expand Down
Loading
Loading