From 8384e3e6fc0e8d038df771f066f11da590f9b131 Mon Sep 17 00:00:00 2001 From: ZenRay Date: Wed, 3 Oct 2018 18:01:43 +0800 Subject: [PATCH 1/2] feat($dataanalysis): count checkbox option deal with checkbox option value, get the option value set and the option count dict fixes #7 --- tool/report.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tool/report.py diff --git a/tool/report.py b/tool/report.py new file mode 100644 index 0000000..f3e15a6 --- /dev/null +++ b/tool/report.py @@ -0,0 +1,44 @@ +"""Data Analysis + purpose: module to analysis the data +""" + +# load essential package +import pandas as pd +import numpy as np + +def count_values(data, column, split_option=";"): + """ Count the checkbox values being wrapped by sting + + Parameters: + data: DataFrame + An original data is dataframe + column: string + Column in data, will be parsed + split_option: string default ";" + Use to split the value in column + + Results: + labels: set + A set contains the checkbox option + label_count: dict + A dict contains label as key, amount as value + colors: list optional + Convert the label amount to a matplotlib color value + """ + labels = set() + label_count = dict() + + for i in data[column]: + if pd.notna(i): + # update labels + tem = set(i.split(split_option)) + labels |= tem + + for label in tem: + if label_count.get(label, 0) == 0: + label_count[label] = 1 + else: + label_count[label] +=1 + + return labels, label_count + From a2cb33df5764758d6c7146326472b2f65e913163 Mon Sep 17 00:00:00 2001 From: ZenRay Date: Wed, 3 Oct 2018 19:22:43 +0800 Subject: [PATCH 2/2] feat($dataanalysis): pie plot arguments create the pie plot arguments labels and colors those are parsed from label_count fixes #7 --- tool/report.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/tool/report.py b/tool/report.py index f3e15a6..ec25c85 100644 --- a/tool/report.py +++ b/tool/report.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +#-*-coding: utf-8 -*- """Data Analysis purpose: module to analysis the data """ @@ -34,11 +36,62 @@ def count_values(data, column, split_option=";"): tem = set(i.split(split_option)) labels |= tem - for label in tem: - if label_count.get(label, 0) == 0: - label_count[label] = 1 - else: - label_count[label] +=1 + for label in tem: + if label_count.get(label, 0) == 0: + label_count[label] = 1 + else: + label_count[label] +=1 return labels, label_count +def bar_plot_option(label_count, top=None, color_option=None, value_option=False): + """ Deal with label count + deal with the label_count, so that can be used to plot + Parameters: + label_count: dict + the data is parse from the specific column. Key is label, value is + ammount number + top: int default None + Specify the top level value, which check the option amount. If it is + None, return labels that is a index + color_option: list default None + A color list will be used to convert the label amount. Caution last + element is default color + value_option: boolean default False + If True, concate the value formated to labels + + Result: + labels: ndarray or list + It is used to plot that is a argument labels + colors: list optional value + It is used to plot that is a argument colors + """ + series = pd.Series(label_count).sort_values() + labels = [] + colors = [] + choose_option = 0 + + if not top: + labels = series.index + return labels + # top is not None + for label, condition in \ + zip( + series.index, + series.apply(lambda x: True if x in series.nlargest(top).values else False) + ): + + if condition: + colors.append(color_option[choose_option]) + choose_option += 1 + # update the label with format value + if value_option: + value = "\n{:0.2f}%".format(series.loc[label] / series.sum() * 100) + labels.append(label + value) + else: + labels.append(label) + else: + labels.append("") + colors.append(color_option[-1]) + + return labels, colors \ No newline at end of file