-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequest_code.sql
More file actions
202 lines (164 loc) · 6.71 KB
/
request_code.sql
File metadata and controls
202 lines (164 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
—- Project 1
SELECT
format_date("%Y%m", parse_date("%Y%m%d", date)) as month,
SUM(totals.visits) AS visits,
SUM(totals.pageviews) AS pageviews,
SUM(totals.transactions) AS transactions,
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
WHERE _TABLE_SUFFIX BETWEEN '0101' AND '0331'
GROUP BY 1
ORDER BY 1;
-- Query 2
SELECT
trafficSource.source as source,
sum(totals.visits) as total_visits,
sum(totals.Bounces) as total_no_of_bounces,
(sum(totals.Bounces)/sum(totals.visits))* 100.00 as bounce_rate
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`
GROUP BY source
ORDER BY total_visits DESC;
-- Query 3
SELECT
'Month' AS time_type,
FORMAT_DATE('%Y%m', PARSE_DATE('%Y%m%d', date)) AS time,
trafficSource.source AS source,
SUM(product.productRevenue) / 1000 AS revenue
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`,
UNNEST(hits) AS hits,
UNNEST(hits.product) AS product
WHERE product.productRevenue IS NOT NULL
GROUP BY time, source
UNION ALL
SELECT
'Week' AS time_type,
FORMAT_DATE('%G%V', PARSE_DATE('%Y%m%d', date)) AS time,
trafficSource.source AS source,
SUM(product.productRevenue) / 1000 AS revenue
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`,
UNNEST(hits) AS hits,
UNNEST(hits.product) AS product
WHERE product.productRevenue IS NOT NULL
GROUP BY time, source
ORDER BY revenue DESC,time;
-- nên order by time_type, để month vs week đc xếp thành các cụm riêng biệt
--Query 4
with
purchaser_data as(
select
format_date("%Y%m",parse_date("%Y%m%d",date)) as month,
(sum(totals.pageviews)/count(distinct fullvisitorid)) as avg_pageviews_purchase,
from `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
,unnest(hits) hits
,unnest(product) product
where _table_suffix between '0601' and '0731'
and totals.transactions>=1
and product.productRevenue is not null
group by month
),
non_purchaser_data as(
select
format_date("%Y%m",parse_date("%Y%m%d",date)) as month,
sum(totals.pageviews)/count(distinct fullvisitorid) as avg_pageviews_non_purchase,
from `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
,unnest(hits) hits
,unnest(product) product
where _table_suffix between '0601' and '0731'
and totals.transactions is null
and product.productRevenue is null
group by month
)
select
pd.*,
avg_pageviews_non_purchase
from purchaser_data pd
full join non_purchaser_data using(month)
order by pd.month;
-- câu 4 này lưu ý là mình nên dùng full join/left join, bởi vì trong câu này, phạm vi chỉ từ tháng 6-7, nên chắc chắc sẽ có pur và nonpur của cả 2 tháng
-- mình inner join thì vô tình nó sẽ ra đúng. nhưng nếu đề bài là 1 khoảng thời gian dài hơn, 2-3 năm chẳng hạn, thì có tháng chỉ có nonpur mà k có pur
-- thì khi đó inner join nó sẽ làm mình bị mất data, thay vì hiện số của nonpur và pur thì nó để trống
-- Query 5
select
format_date("%Y%m",parse_date("%Y%m%d",date)) as month,
sum(totals.transactions)/count(distinct fullvisitorid) as Avg_total_transactions_per_user
from `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`
,unnest (hits) hits,
unnest(product) product
where totals.transactions>=1
and product.productRevenue is not null
group by month;
--Query 6
select
format_date("%Y%m",parse_date("%Y%m%d",date)) as month,
((sum(product.productRevenue)/sum(totals.visits))/power(10,6)) as avg_revenue_by_user_per_visit
from `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`
,unnest(hits) hits
,unnest(product) product
where product.productRevenue is not null
and totals.transactions>=1
group by month;
--Query 7
-- subquery:
select
product.v2productname as other_purchased_product,
sum(product.productQuantity) as quantity
from `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`,
unnest(hits) as hits,
unnest(hits.product) as product
where fullvisitorid in (select distinct fullvisitorid
from `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`,
unnest(hits) as hits,
unnest(hits.product) as product
where product.v2productname = "YouTube Men's Vintage Henley"
and product.productRevenue is not null
AND totals.transactions>=1)
and product.v2productname != "YouTube Men's Vintage Henley"
and product.productRevenue is not null
AND totals.transactions>=1
group by other_purchased_product
order by quantity desc;
-- CTE:
-- ở bảng buyer_list này, mình chỉ muốn tìm ra danh sách nhưng ng mua, thì nó người ngta sẽ mua nhiều lần chẳng hặn
-- khi mình select distinct fullVisitorId, nó sẽ 3 người, nhưng nếu mình select fullVisitorId
-- nó ra 6 dòng, tường ứng với 6 record trong bảng, rồi nó mang 6 dòng này, đi mapping tiếp với câu dưới, nên nó bị dup lên
with buyer_list as(
SELECT
distinct fullVisitorId
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`
, UNNEST(hits) AS hits
, UNNEST(hits.product) as product
WHERE product.v2ProductName = "YouTube Men's Vintage Henley"
AND totals.transactions>=1
AND product.productRevenue is not null
)
SELECT
product.v2ProductName AS other_purchased_products,
SUM(product.productQuantity) AS quantity
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_201707*`
, UNNEST(hits) AS hits
, UNNEST(hits.product) as product
JOIN buyer_list using(fullVisitorId)
WHERE product.v2ProductName != "YouTube Men's Vintage Henley"
and product.productRevenue is not null
AND totals.transactions>=1
GROUP BY other_purchased_products
ORDER BY quantity DESC;
--Query 8
with product_data as(
select
format_date('%Y%m', parse_date('%Y%m%d',date)) as month,
count(CASE WHEN eCommerceAction.action_type = '2' THEN product.v2ProductName END) as num_product_view,
count(CASE WHEN eCommerceAction.action_type = '3' THEN product.v2ProductName END) as num_add_to_cart,
count(CASE WHEN eCommerceAction.action_type = '6' and product.productRevenue is not null THEN product.v2ProductName END) as num_purchase
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
,UNNEST(hits) as hits
,UNNEST (hits.product) as product
where _table_suffix between '20170101' and '20170331'
and eCommerceAction.action_type in ('2','3','6')
group by month
order by month
)
select
*,
round(num_add_to_cart/num_product_view * 100, 2) as add_to_cart_rate,
round(num_purchase/num_product_view * 100, 2) as purchase_rate
from product_data;