Group by
Subqueries
Joins
Common table expressions
Aggregate functions
Window functions
/*
Pitchfork Album Reviews - Data Exploration
Melissa Webb
*/
-- Who were the most reviewed artists?
SELECT artist,
COUNT(artist) AS review_count
FROM artists
GROUP BY artist
ORDER BY review_count DESC,
artist
LIMIT 10;
-- Which artists scored the highest on average?
SELECT a.artist,
ROUND(AVG(r.score), 2) AS avg_score
FROM artists AS a
JOIN reviews AS r
USING (reviewid)
GROUP BY a.artist
ORDER BY avg_score DESC,
artist
LIMIT 10;
-- Who were the highest rated artists by year?
WITH scores_by_year AS(
SELECT year(r.pub_date) AS year,
a.artist,
ROUND(AVG(r.score), 2) AS avg_score,
MAX(AVG(r.score)) OVER (PARTITION BY year(r.pub_date)) AS highest_avg_score
FROM reviews AS r
JOIN artists AS a
USING (reviewid)
GROUP BY year,
a.artist
ORDER BY year DESC,
avg_score DESC,
a.artist
)
SELECT year,
artist,
avg_score
FROM scores_by_year
WHERE avg_score = highest_avg_score;
-- How many reviews were posted each year?
SELECT year(pub_date) AS year,
COUNT(*) AS total_reviews
FROM reviews
GROUP BY 1
ORDER BY 1;
-- On which day of the week were reviews posted the most?
SELECT DAYNAME(pub_date) AS 'day',
COUNT(*) AS total_reviews,
CONCAT(ROUND((COUNT(*) * 100.0) / (
SELECT COUNT(*)
FROM reviews)), '%') AS percentage_of_reviews
FROM reviews
GROUP BY 1
ORDER BY 2 DESC;
-- What genres were reviewed the most each year?
WITH genres_by_year AS(
SELECT year(r.pub_date) AS year,
g.genre,
COUNT(*) AS total_reviews,
MAX(COUNT(*)) OVER (PARTITION BY year(r.pub_date)) AS highest_total_reviews
FROM genres AS g
JOIN reviews AS r
USING (reviewid)
WHERE g.genre IS NOT NULL
GROUP BY year,
g.genre
ORDER BY year DESC,
total_reviews DESC
)
SELECT year,
genre,
total_reviews
FROM genres_by_year
WHERE total_reviews = highest_total_reviews;
-- What was the most reviewed genre for each label?
WITH genres_by_label AS(
SELECT l.label,
g.genre,
COUNT(*) AS total_reviews,
MAX(COUNT(*)) OVER (PARTITION BY l.label) AS highest_total_reviews
FROM labels AS l
JOIN genres AS g
USING (reviewid)
WHERE l.label IS NOT NULL
AND g.genre IS NOT NULL
GROUP BY label,
genre
ORDER BY label,
total_reviews DESC
)
SELECT label,
genre,
total_reviews
FROM genres_by_label
WHERE total_reviews = highest_total_reviews;