-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasicdataanalysis.py
More file actions
115 lines (87 loc) · 3.45 KB
/
basicdataanalysis.py
File metadata and controls
115 lines (87 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#Import the module openpyxl
import openpyxl
#Import the Excel sales data
import csv
#Read the data from the spreadsheet
def read_data():
data = []
with open("sales.csv", "r") as sales_csv:
spreadsheet = csv.DictReader(sales_csv)
for row in spreadsheet:
data.append(row)
return data
def run():
data = read_data()
sales = []
dictionary = {}
for row in data:
sale = int(row["sales"])
month = row["month"]
sales.append(sale)
dictionary[sale] = month
#Collect all the sales from each month into a single list
list_of_sales = list(sales)
print("List of sales: {}".format(list_of_sales))
print()
#Show the monthly changes as a percentage and fill NaN with a value
import pandas as pd
df = pd.read_csv(r"sales.csv")
df["percentage-change for sales"] = df["sales"].pct_change() * 100
df = df.fillna(0)
print(df)
output_df = pd.DataFrame()
#Create an excelwriter object
with pd.ExcelWriter(r"Book_Sales.xlsx") as writer:
df.to_excel(writer, sheet_name="Results Sheet", index=True)
output_df.to_excel(writer, sheet_name="Results Sheet", index=False)
#Load the existing workbook
from openpyxl import load_workbook
workbook_name = "Book_Sales.xlsx"
wb = load_workbook(workbook_name)
page = wb.active
#Set the column dimensions as best fit
from openpyxl.utils import get_column_letter
wb = openpyxl.load_workbook("Book_Sales.xlsx")
ws = wb["Results Sheet"]
for i in range(1, ws.max_column + 1):
ws.column_dimensions[get_column_letter(i)].bestFit = True
ws.column_dimensions[get_column_letter(i)].auto_size = True
#Save the changes
wb.save("Book_Sales.xlsx")
wb.close()
from openpyxl import load_workbook
wb2 = load_workbook("Book_Sales.xlsx")
wb2.create_sheet("Yearly Results")
wb2.save("Book_Sales.xlsx")
wb.close()
#Output the total sales across all months
total = sum(sales)
print("\n""The total value of sales is £{}".format(total))
#Find the maximum and minimum values of sales
max_value = max(sales)
min_value = min(sales)
#Find the months with the lowest and highest number of sales
max_month = dictionary.get(max_value)
min_month = dictionary.get(min_value)
#Find the average value of sales
number_of_sales = len(sales)
average_sales_value = total / number_of_sales
import math
number = average_sales_value
rounded_number = math.ceil(number)
#Print these values
print("The maximum value is £{}".format(max_value))
print("The minimum value is £{}".format(min_value))
print("The month with the highest number of sales is {}".format(max_month))
print("The month with the lowest number of sales is {}".format(min_month))
print("The average value is £{}".format(rounded_number))
import pandas as pd
#Create a dataframe for the new data
df = pd.DataFrame({"Sales Total": [total], "Sales Average": [rounded_number], "Sales Maximum": [max_value], "Sales Minimum": [min_value], "Month with Highest Sales": [max_month], "Month with Lowest Sales": [min_month]})
#Use ExcelWriter with append mode and replace option; index=False removes surplus numbering from the first column
with pd.ExcelWriter(r"Book_Sales.xlsx", mode="a", if_sheet_exists="replace") as writer:
df.to_excel(writer, sheet_name="Yearly Results", index=False)
#Save the workbook to keep the changes
wb.save("Book_Sales.xlsx")
wb.close()
run()