Students' test performance analysis¶

Data visualisations made by Janhavi Pimplikar

A student at Pimpri Chinchwad College of Engineering

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
import plotly as py
import cufflinks as cf

py.offline.init_notebook_mode(connected=True)

cf.go_offline()

from plotly.offline import iplot

Univariate analysis of student record¶

student=pd.read_csv('Studentrecord.csv')

student

gender=student['gender'].value_counts()

gender

female    518
male      482
Name: gender, dtype: int64

explode=(0.0,0.2)
gender.plot.pie(figsize=(7,7),legend=True,autopct='%1.1f%%',fontsize=15,shadow=True,colors=sns.color_palette('Spectral'))
c=plt.Circle((0,0),0.3,color='white')
plt.gca().add_artist(c)

<matplotlib.patches.Circle at 0x24726a3b708>

student['race/ethnicity']

0      group B
1      group C
2      group B
3      group A
4      group C
        ...   
995    group E
996    group C
997    group C
998    group D
999    group D
Name: race/ethnicity, Length: 1000, dtype: object

fig,ax=plt.subplots(figsize=(10,5))
graph=sns.countplot('race/ethnicity',data=student,ax=ax)
sns.set_style('darkgrid')
graph.set_xticklabels(graph.get_xticklabels(),size=12)
plt.xlabel('Race/Ethnicity of students',size=15)
plt.ylabel('Number of students',size=15)
plt.title('Race analysis of students',size=20)

Text(0.5, 1.0, 'Race analysis of students')

edunumber=student['parental level of education'].value_counts()

edunumber

some college          226
associate's degree    222
high school           196
some high school      179
bachelor's degree     118
master's degree        59
Name: parental level of education, dtype: int64

edunumber.iplot(kind='bar',legend=True,color='lightgreen',xTitle='Education level',yTitle='Number of students',title='Education level of each student')

fig,ax=plt.subplots(figsize=(10,5))
sns.countplot(student['lunch'],palette='inferno')
plt.style.use('ggplot')
plt.xlabel('Lunch status',size=15)
plt.ylabel('Number of students',size=15)
plt.xticks(size=15)
plt.yticks(size=15)

(array([  0., 100., 200., 300., 400., 500., 600., 700.]),
 <a list of 8 Text yticklabel objects>)

fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['math score'],color='orange',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Maths scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of math scores of students',size=15)

Text(0.5, 1.0, 'Frequency distribution of math scores of students')

fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['reading score'],color='blue',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Reading scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of reading scores of students',size=15)

Text(0.5, 1.0, 'Frequency distribution of reading scores of students')

fig,ax=plt.subplots(figsize=(10,6))
g=sns.distplot(student['writing score'],color='green',kde=False,ax=ax)
sns.set_style('darkgrid')
plt.xlabel('Writing scores',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
g.set_title('Frequency distribution of writing scores of students',size=15)

Text(0.5, 1.0, 'Frequency distribution of writing scores of students')

Analysis of female students¶

fem=student.loc[student['gender']=='female']

fem

gender=fem['gender'].value_counts()
parental_level_of_education=fem['parental level of education'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Pastel1')
outer_shades=cmap(np.arange(2)*1)
inner_shades=cmap(np.arange(10)*1)
gender.plot.pie(radius=1,colors=outer_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15)
parental_level_of_education.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=10)
plt.ylabel('Parental level of education (females)',size=15)
plt.show()

race=fem['race/ethnicity'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Dark2')
inner_shades=cmap(np.arange(10)*1)
race.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15,shadow=True,legend=True)
plt.legend(loc='best',title='Racial groups')
plt.ylabel('Racial groups (females)',size=15)
plt.show()

femscores=fem[['math score','reading score','writing score']]

femscores

femscores.iplot(kind='box',legend=True,xTitle='Subjects',yTitle='Score',title='Score analysis of female students',colors={'math score':'fuchsia','reading score':'purple','writing score':'yellow'})

Analysis of male students¶

male=student.loc[student['gender']=='male']

male

gender1=male['gender'].value_counts()
parental_level_of_education=male['parental level of education'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Pastel2')
outer_shades=cmap(np.arange(2)*1)
inner_shades=cmap(np.arange(10)*1)
plots=gender1.plot.pie(radius=1,colors=outer_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15)
parental_level_of_education.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=10)
plt.ylabel('Parental level of education (males)',size=15)
plt.show()

race1=male['race/ethnicity'].value_counts()
fig,ax=plt.subplots(figsize=(8,8),dpi=100)
size=0.3
cmap=plt.get_cmap('Set1')
inner_shades=cmap(np.arange(10)*1)
race1.plot.pie(radius=1-size,autopct='%1.2f%%',colors=inner_shades,wedgeprops=dict(width=size),ax=ax,startangle=60,fontsize=15,shadow=True,legend=True)
plt.legend(loc='best',title='Racial groups')
plt.ylabel('Racial groups (males)',size=15)
plt.show()

malescores=male[['math score','reading score','writing score']]

malescores

malescores.iplot(kind='box',legend=True,xTitle='Subjects',yTitle='Score',title='Score analysis of male students',colors={'math score':'red','reading score':'navy','writing score':'brown'})

Bivariate scores analysis of students (both genders)¶

sns.catplot(x='race/ethnicity',y='math score',hue='gender',data=student,height=6,aspect=2,palette='CMRmap')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Math score',size=15,color='red')
plt.title('Math score analysis by race',size=20)

Text(0.5, 1, 'Math score analysis by race')

sns.catplot(x='race/ethnicity',y='reading score',hue='gender',data=student,height=6,aspect=2,palette='gnuplot2')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Reading score',size=15,color='red')
plt.title('Reading score analysis by race',size=20)

Text(0.5, 1, 'Reading score analysis by race')

sns.catplot(x='race/ethnicity',y='writing score',hue='gender',data=student,height=6,aspect=2,palette='gist_rainbow')
sns.set_style('whitegrid')
sns.despine(right=False)
plt.xticks(size=15,rotation=45)
plt.xlabel('Race/Ethnicity',size=15,color='red')
plt.ylabel('Writing score',size=15,color='red')
plt.title('Writing score analysis by race',size=20)

Text(0.5, 1, 'Writing score analysis by race')

mathedu=student[['gender','parental level of education','math score','reading score','writing score']]

mathedu=mathedu.head(300)

total=mathedu['math score'] + mathedu['reading score'] + mathedu['writing score']  #adding column to an existing subset of dataframe

mathedu['total score']=total

mathedu

ax=sns.catplot(x='parental level of education',y='total score',hue='gender',data=mathedu,kind='swarm',height=6,aspect=2,legend=True,palette='CMRmap')
sns.set_style('darkgrid')
plt.xlabel('Parental level of education',size=15)
plt.xticks(size=15)
plt.yticks(size=15)
plt.ylabel('Total score',size=15)
plt.title('Score analysis of first 300 students with respect to educational background and gender',size=15)
plt.legend(fontsize='xx-large', title_fontsize='40') #works with catplot and also provides better visibiltiy to legend

<matplotlib.legend.Legend at 0x24728c44388>

mathedu=mathedu[['math score','reading score','writing score']]

mathedu

mathedu.iplot(kind='box',xTitle='Subjects',yTitle='Scores',title='Scores analysis of each subject (300 students)')

dataset imported from https://www.kaggle.com/spscientist/students-performance-in-exams?select=StudentsPerformance.csv

attributes: Websites such as stackoverflow.com, medium.com, geeksforgeeks.com etc.

	math score	reading score	writing score
3	47	57	44
4	76	78	75
7	40	43	39
8	64	64	67
10	58	54	52
...	...	...	...
985	57	51	54
987	81	75	76
990	86	81	75
994	63	63	62
996	62	55	55

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
3	47	57	44
4	76	78	75
...	...	...	...
295	67	62	60
296	46	41	43
297	71	74	68
298	40	46	50
299	90	87	75

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
0	female	group B	bachelor's degree	standard	none	72	72	74
1	female	group C	some college	standard	completed	69	90	88
2	female	group B	master's degree	standard	none	90	95	93
3	male	group A	associate's degree	free/reduced	none	47	57	44
4	male	group C	some college	standard	none	76	78	75
...	...	...	...	...	...	...	...	...
995	female	group E	master's degree	standard	completed	88	99	95
996	male	group C	high school	free/reduced	none	62	55	55
997	female	group C	high school	free/reduced	completed	59	71	65
998	female	group D	some college	standard	completed	68	78	77
999	female	group D	some college	free/reduced	none	77	86	86

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
5	71	83	78
6	88	95	92
...	...	...	...
993	62	72	74
995	88	99	95
997	59	71	65
998	68	78	77
999	77	86	86

	math score	reading score	writing score
3	47	57	44
4	76	78	75
7	40	43	39
8	64	64	67
10	58	54	52
...	...	...	...
985	57	51	54
987	81	75	76
990	86	81	75
994	63	63	62
996	62	55	55

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
3	47	57	44
4	76	78	75
...	...	...	...
295	67	62	60
296	46	41	43
297	71	74	68
298	40	46	50
299	90	87	75

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
5	71	83	78
6	88	95	92
...	...	...	...
993	62	72	74
995	88	99	95
997	59	71	65
998	68	78	77
999	77	86	86

	math score	reading score	writing score
3	47	57	44
4	76	78	75
7	40	43	39
8	64	64	67
10	58	54	52
...	...	...	...
985	57	51	54
987	81	75	76
990	86	81	75
994	63	63	62
996	62	55	55

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
3	47	57	44
4	76	78	75
...	...	...	...
295	67	62	60
296	46	41	43
297	71	74	68
298	40	46	50
299	90	87	75

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
5	71	83	78
6	88	95	92
...	...	...	...
993	62	72	74
995	88	99	95
997	59	71	65
998	68	78	77
999	77	86	86

	math score	reading score	writing score
3	47	57	44
4	76	78	75
7	40	43	39
8	64	64	67
10	58	54	52
...	...	...	...
985	57	51	54
987	81	75	76
990	86	81	75
994	63	63	62
996	62	55	55

	math score	reading score	writing score
0	72	72	74
1	69	90	88
2	90	95	93
3	47	57	44
4	76	78	75
...	...	...	...
295	67	62	60
296	46	41	43
297	71	74	68
298	40	46	50
299	90	87	75