# Import basic data manipulation and plotting packages
from math import *
import numpy as np
import statsmodels as sm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt


#Read in csv file and take a look at structure and data types
tv = pd.read_csv(r"C:/Users/laryl/Desktop/Data Sets/tv_shows.csv")
print(tv.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5611 entries, 0 to 5610
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       5611 non-null   int64  
 1   Title            5611 non-null   object 
 2   Year             5611 non-null   int64  
 3   Age              3165 non-null   object 
 4   IMDb             4450 non-null   float64
 5   Rotten Tomatoes  1011 non-null   object 
 6   Netflix          5611 non-null   int64  
 7   Hulu             5611 non-null   int64  
 8   Prime Video      5611 non-null   int64  
 9   Disney+          5611 non-null   int64  
 10  type             5611 non-null   int64  
dtypes: float64(1), int64(7), object(3)
memory usage: 416.5+ KB
None


#String manipulation to remove percent symbol
tv["Rotten Tomatoes"] = tv["Rotten Tomatoes"].str.strip("%")

#Check result of string manipulation 
print(tv["Rotten Tomatoes"].head(5))

# Change data type from object to float
tv["Rotten Tomatoes"] = tv["Rotten Tomatoes"].astype('float')
print(tv.dtypes)

0    96
1    93
2    91
3    78
4    97
Name: Rotten Tomatoes, dtype: object
Unnamed: 0           int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes    float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
type                 int64
dtype: object


# Visualize Age Groups by IMDb Score
plt.style.use("dark_background")
blues = sns.set_palette("Blues")
sns.barplot(x = "Age", 
            y = "IMDb",
            data = tv, 
            ci = None, 
            order =["all", "18+", "16+", "13+", "7+"],
            palette= blues )
sns.set_context("notebook")
plt.show()

# Visualize Age Groups by Rotten Tomatoes Score
reds = sns.set_palette("Reds")
sns.barplot(x = "Age", 
            y = "Rotten Tomatoes",
            data = tv, 
            ci = None, 
            order =["all", "18+", "16+", "13+", "7+"],
            palette= reds )
plt.show()


plt.style.use("dark_background")
greens = sns.set_palette("Greens")
plots = sns.countplot(x = "Age", 
              data = tv,
              order =["all", "18+", "16+", "13+", "7+"],
              palette= greens)
for bar in plots.patches: 
    plots.annotate(format(bar.get_height(), '.2f'),  
                   (bar.get_x() + bar.get_width() / 2,  
                    bar.get_height()), ha='center', va='center', 
                   size= 9, xytext=(0, 4), 
                   textcoords='offset points') 
plt.show()


sns.swarmplot(x = "Age",
              y = 'IMDb',
              data = tv,
              size = 3)

<AxesSubplot:xlabel='Age', ylabel='IMDb'>


sns.swarmplot(y = "Age",
              x = 'Rotten Tomatoes',
              data = tv,
              size = 3)

<AxesSubplot:xlabel='Rotten Tomatoes', ylabel='Age'>

Do Children Have Bad Taste?¶

Motivation¶

Guiding Question¶

Data Preparation¶

Exploratory Data Visualization¶

Conclusion¶

Sources¶