-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL-Retail-Analysis-project.sql
More file actions
185 lines (149 loc) · 4.37 KB
/
Copy pathSQL-Retail-Analysis-project.sql
File metadata and controls
185 lines (149 loc) · 4.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
-- SQL Retail Sales Analysis -
select tablename from pg_tables where schemaname ='public';
select * from "SQL" s
limit 10;
select count(*) from "SQL" s ;
-- Data Cleaning
select * from "SQL" s
where
transactions_id is null
or
sale_date is null
or
sale_time is null
or
gender is null
or
category is null
or
cogs is null
or
total_sale is null
or
quantiy is null;
--
delete from "SQL" s
where
transactions_id is null
or
sale_date is null
or
sale_time is null
or
gender is null
or
category is null
or
cogs is null
or
total_sale is null
or
quantiy is null;
-- Data Exploration
-- how many sales we have?
select count(*) as total_sale from "SQL" s ;
--how many customers do we have?
select count(s.customer_id ) as total_customers from "SQL" s ;
-- how many unique customers we have?
select count(distinct customer_id) as total_customers from "SQL" s;
--how many categories do we have?
select distinct category from "SQL" s ;
-- Data Analysis & Business Key Problems & Answers
-- My Analysis & Findings
-- Q.1 Write a SQL query to retrieve all columns for sales made on '2022-11-05?
select * from "SQL" s
where s.sale_date ='2022-11-05';
-- Q.2 Write a SQL query to retrieve all transactions where the category is 'Clothing' and the quantity sold is greater or equal to 4 in the month of Nov-2022
select
*
from "SQL" s
where s.category = 'Clothing'
and s.quantiy >= 4
and to_char(s.sale_date::date , 'YYYY-MM') = '2022-11';
-- Q.3 Write a SQL query to calculate the total sales (total_sale) for each category.
select category, SUM(total_sale) as Total_sales from "SQL" s
group by s.category ;
-- Q.4 Write a SQL query to find the average age of customers who purchased items from the 'Beauty' category.
select category, count(s.customer_id ) as Total_customers, round(avg(age), 2) as Average_age from "SQL" s
group by category ;
-- Q.5 Write a SQL query to find all transactions where the total_sale is greater than 1000.
select * from "SQL" s
where total_sale > 1000;
-- Q.6 Write a SQL query to find the total number of transactions (transaction_id) made by each gender in each category.
select s.category ,gender, count(s.transactions_id ) as Total_transactions from "SQL" s
group by category ,gender
order by 1;
-- Q.7 Write a SQL query to calculate the average sale for each month. Find out best selling month in each year
select
t.year,
t.month,
t.avg_sale,
t.total_sale,
'Best Month' as status
from (
select
extract(year from sale_date::date ) :: int as year,
extract (month from sale_date::date ) :: int as month,
round(avg(total_sale), 2) as avg_sale,
sum(total_sale) as total_sale,
rank() over(partition by extract(year from sale_date::date )
order by avg(total_sale) desc) as month_rank
from "SQL" s
group by
extract(year from sale_date::date ),
extract (month from sale_date::date )
) t
where t.month_rank = 1
order by t.year desc;
--order by 1, 2;
SELECT
year,
month,
avg_sale
FROM
(
SELECT
EXTRACT(YEAR FROM sale_date:: date) :: int as year,
EXTRACT(MONTH FROM sale_date :: date) :: int as month,
AVG(total_sale) as avg_sale,
RANK() OVER(PARTITION BY EXTRACT(YEAR FROM sale_date :: date) ORDER BY AVG(total_sale) DESC) as rank
FROM "SQL" s
GROUP BY
EXTRACT(YEAR FROM sale_date:: date),
EXTRACT(MONTH FROM sale_date :: date)
) as t1
WHERE rank = 1
--order by 1, 3
-- Q.8 Write a SQL query to find the top 5 customers based on the highest total sales
select
customer_id,
sum(s.total_sale ) as sales
from "SQL" s
group by customer_id
order by 2 desc
limit 5;
-- Q.9 Write a SQL query to find the number of unique customers who purchased items from each category.
select
category,
count(distinct s.customer_id ) as Number_of_customers
from "SQL" s
group by s.category
order by count(distinct s.customer_id );
-- Q.10 Write a SQL query to create each shift and number of orders (Example Morning <=12, Afternoon Between 12 & 17, Evening >17)
WITH hourly_sale
AS
(
SELECT *,
CASE
WHEN EXTRACT(HOUR FROM sale_time :: Time) :: int < 12 THEN 'Morning'
WHEN EXTRACT(HOUR FROM sale_time :: Time) :: int BETWEEN 12 AND 17 THEN 'Afternoon'
ELSE 'Evening'
END as shift
FROM "SQL" s
)
SELECT
shift,
COUNT(*) as total_orders
FROM hourly_sale
GROUP BY shift
-----------------------------END OF PROJECT-----------------------