Also include common EDA/preprocessing done with libraries such as Seaborn, Plotly, etc
#Check for missing records with train as the df
train.isnull().sum(axis=0)
#Visualize how many reviews per Sentiment
sns.barplot(x=train.Sentiment.value_counts().index,y=train.Sentiment.value_counts())
#Drop fares with values less than 0
df = df[df.fare_amount >= 0]
df.describe()
df['class'].value_counts().sort_values().plot(kind = 'barh')
#All variables are categorical need to encode
from sklearn.preprocessing import LabelEncoder
def encodeCategorical(data):
labelencoder=LabelEncoder()
for col in data.columns:
data[col] = labelencoder.fit_transform(data[col])
return data
df = encodeCategorical(df)
df.head()
X = df[['artist','Genre/Mood','Language','release_year','popularity']]
y = df['name']
enc = OrdinalEncoder()
enc.fit(df[["Sex","Blood", "Study"]])
df[["Sex","Blood", "Study"]] = enc.transform(df[["Sex","Blood", "Study"]])
Smarter Ways to Encode Categorical Data for Machine Learning
Using OrdinalEncoder to transform categorical values in Python