Data exploration of over 18,000 music reviews using SQL

Skills used:


/*
	Pitchfork Album Reviews - Data Exploration
	Melissa Webb
*/

-- Who were the most reviewed artists?

SELECT artist,
       COUNT(artist) AS review_count
FROM artists
GROUP BY artist
ORDER BY review_count DESC, 
			   artist
LIMIT 10;

-- Which artists scored the highest on average?

SELECT a.artist,
       ROUND(AVG(r.score), 2) AS avg_score
FROM artists AS a
JOIN reviews AS r
	USING (reviewid)
GROUP BY a.artist
ORDER BY avg_score DESC, 
				 artist
LIMIT 10;

-- Who were the highest rated artists by year?

WITH scores_by_year AS(
	SELECT year(r.pub_date) AS year,
			   a.artist,
			   ROUND(AVG(r.score), 2) AS avg_score,
         MAX(AVG(r.score)) OVER (PARTITION BY year(r.pub_date)) AS highest_avg_score
	FROM reviews AS r
	JOIN artists AS a
		USING (reviewid)
	GROUP BY year, 
					 a.artist
	ORDER BY year DESC, 
					 avg_score DESC, 
           a.artist
)
SELECT year, 
	     artist,
       avg_score 
FROM scores_by_year
WHERE avg_score = highest_avg_score;
       

-- How many reviews were posted each year?

SELECT year(pub_date) AS year,
	     COUNT(*) AS total_reviews
FROM reviews
GROUP BY 1
ORDER BY 1;
  

-- On which day of the week were reviews posted the most?

SELECT DAYNAME(pub_date) AS 'day',
       COUNT(*) AS total_reviews,
       CONCAT(ROUND((COUNT(*) * 100.0) / (
                SELECT COUNT(*) 
                FROM reviews)), '%') AS percentage_of_reviews
FROM reviews
GROUP BY 1
ORDER BY 2 DESC;

-- What genres were reviewed the most each year?

WITH genres_by_year AS(
	SELECT year(r.pub_date) AS year,
		     g.genre,
		     COUNT(*) AS total_reviews,
		     MAX(COUNT(*)) OVER (PARTITION BY year(r.pub_date)) AS highest_total_reviews
	FROM genres AS g
	JOIN reviews AS r
		USING (reviewid)
	WHERE g.genre IS NOT NULL
	GROUP BY year,
				   g.genre
	ORDER BY year DESC,
					 total_reviews DESC
)
SELECT year,
		   genre,
       total_reviews
FROM genres_by_year
WHERE total_reviews = highest_total_reviews;

-- What was the most reviewed genre for each label?

WITH genres_by_label AS(
	SELECT l.label,
		     g.genre,
		     COUNT(*) AS total_reviews,
		     MAX(COUNT(*)) OVER (PARTITION BY l.label) AS highest_total_reviews
	FROM labels AS l
	JOIN genres AS g
		USING (reviewid)
	WHERE l.label IS NOT NULL 
		AND g.genre IS NOT NULL
	GROUP BY label, 
			 genre
	 ORDER BY label,
			  total_reviews DESC
)
SELECT label,
	     genre,
       total_reviews
FROM genres_by_label
WHERE total_reviews = highest_total_reviews;
Powered by Fruition