Data visualisations made by Janhavi Pimplikar
A student at Pimpri Chinchwad College of Engineering
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
import cufflinks as cf
py.offline.init_notebook_mode(connected=True)
cf.go_offline()
from plotly.offline import iplot
student=pd.read_csv('Studentrecord.csv')
student
gender=student['gender'].value_counts()
gender
explode=(0.0,0.2)
gender.plot.pie(figsize=(7,7),legend=True,autopct='%1.1f%%',fontsize=15,shadow=True,colors=sns.color_palette('Spectral'))
c=plt.Circle((0,0),0.3,color='white')
plt.gca().add_artist(c)
student['race/ethnicity']
fig,ax=plt.subplots(figsize=(10,5))
graph=sns.countplot('race/ethnicity',data=student,ax=ax)
sns.set_style('darkgrid')
graph.set_xticklabels(graph.get_xticklabels(),size=12)
plt.xlabel('Race/Ethnicity of students',size=15)
plt.ylabel('Number of students',size=15)
plt.title('Race analysis of students',size=20)
edunumber=student['parental level of education'].value_counts()
edunumber
edunumber.iplot(kind='bar',legend=True,color='lightgreen',xTitle='Education level',yTitle='Number of students',title='Education level of each student')
fig,ax=plt.subplots(figsize=(10,5))
sns.countplot(student['lunch'],palette='inferno')
plt.style.use('ggplot')
plt.xlabel('Lunch status',size=15)
plt.ylabel('Number of students',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['math score'],color='orange',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Maths scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of math scores of students',size=15)
fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['reading score'],color='blue',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Reading scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of reading scores of students',size=15)
fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['writing score'],color='green',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Writing scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of writing scores of students',size=15)
fem=student.loc[student['gender']=='female']
fem
gender=fem['gender'].value_counts()
parental_level_of_education=fem['parental level of education'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Pastel1')
outer_shades=cmap(np.arange(2)*1)
inner_shades=cmap(np.arange(10)*1)
gender.plot.pie(radius=1,colors=outer_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15)
parental_level_of_education.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=10)
plt.ylabel('Parental level of education (females)',size=15)
plt.show()
race=fem['race/ethnicity'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Dark2')
inner_shades=cmap(np.arange(10)*1)
race.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15,shadow=True,legend=True)
plt.legend(loc='best',title='Racial groups')
plt.ylabel('Racial groups (females)',size=15)
plt.show()
femscores=fem[['math score','reading score','writing score']]
femscores
femscores.iplot(kind='box',legend=True,xTitle='Subjects',yTitle='Score',title='Score analysis of female students',colors={'math score':'fuchsia','reading score':'purple','writing score':'yellow'})
male=student.loc[student['gender']=='male']
male
gender1=male['gender'].value_counts()
parental_level_of_education=male['parental level of education'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Pastel2')
outer_shades=cmap(np.arange(2)*1)
inner_shades=cmap(np.arange(10)*1)
plots=gender1.plot.pie(radius=1,colors=outer_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15)
parental_level_of_education.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=10)
plt.ylabel('Parental level of education (males)',size=15)
plt.show()
race1=male['race/ethnicity'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Set1')
inner_shades=cmap(np.arange(10)*1)
race1.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15,shadow=True,legend=True)
plt.legend(loc='best',title='Racial groups')
plt.ylabel('Racial groups (males)',size=15)
plt.show()
malescores=male[['math score','reading score','writing score']]
malescores
malescores.iplot(kind='box',legend=True,xTitle='Subjects',yTitle='Score',title='Score analysis of male students',colors={'math score':'red','reading score':'navy','writing score':'brown'})
sns.catplot(x='race/ethnicity',y='math score',hue='gender',data=student,height=6,aspect=2,palette='CMRmap')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Math score',size=15,color='red')
plt.title('Math score analysis by race',size=20)
sns.catplot(x='race/ethnicity',y='reading score',hue='gender',data=student,height=6,aspect=2,palette='gnuplot2')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Reading score',size=15,color='red')
plt.title('Reading score analysis by race',size=20)
sns.catplot(x='race/ethnicity',y='writing score',hue='gender',data=student,height=6,aspect=2,palette='gist_rainbow')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Writing score',size=15,color='red')
plt.title('Writing score analysis by race',size=20)
mathedu=student[['gender','parental level of education','math score','reading score','writing score']]
mathedu=mathedu.head(300)
total=mathedu['math score'] + mathedu['reading score'] + mathedu['writing score'] #adding column to an existing subset of dataframe
mathedu['total score']=total
mathedu
ax=sns.catplot(x='parental level of education',y='total score',hue='gender',data=mathedu,kind='swarm',height=6,aspect=2,legend=True,palette='CMRmap')
sns.set_style('darkgrid')
plt.xlabel('Parental level of education',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
plt.ylabel('Total score',size=15)
plt.title('Score analysis of first 300 students with respect to educational background and gender',size=15)
plt.legend(fontsize='xx-large', title_fontsize='40') #works with catplot and also provides better visibiltiy to legend
mathedu=mathedu[['math score','reading score','writing score']]
mathedu
mathedu.iplot(kind='box',xTitle='Subjects',yTitle='Scores',title='Scores analysis of each subject (300 students)')
dataset imported from https://www.kaggle.com/spscientist/students-performance-in-exams?select=StudentsPerformance.csv
attributes: Websites such as stackoverflow.com, medium.com, geeksforgeeks.com etc.