USA voters' (2012) analysis

Data visualisations made by Janhavi Pimplikar

A student at Pimpri Chinchwad College of Engineering

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as py
from plotly.offline import iplot
import cufflinks as cf
In [2]:
py.offline.init_notebook_mode(connected=True)
import seaborn as sns
cf.go_offline()
In [3]:
import plotly.express as px
In [ ]:
 

Total and citizen population in the US

In [4]:
abc=pd.read_csv('USVoters.csv')
In [5]:
abc
Out[5]:
State Abbr. Age Total Population Citizen Population Registered Voters Confirmed Voters
0 Alabama AL 18 to 24 439000 428000 212000 155000
1 Alabama AL 25 to 34 576000 535000 359000 271000
2 Alabama AL 35 to 44 615000 582000 410000 330000
3 Alabama AL 45 to 64 1297000 1275000 1051000 939000
4 Alabama AL 65+ 667000 660000 523000 459000
... ... ... ... ... ... ... ...
250 Wyoming WY 18 to 24 56000 55000 21000 18000
251 Wyoming WY 25 to 34 73000 71000 44000 39000
252 Wyoming WY 35 to 44 68000 66000 41000 36000
253 Wyoming WY 45 to 64 155000 154000 101000 95000
254 Wyoming WY 65+ 74000 73000 61000 59000

255 rows × 7 columns

In [6]:
ABC=abc.groupby('Abbr.').sum().reset_index()
In [7]:
ABC
Out[7]:
Abbr. Total Population Citizen Population Registered Voters Confirmed Voters
0 AK 516000 495000 360000 289000
1 AL 3594000 3480000 2555000 2154000
2 AR 2198000 2110000 1376000 1124000
3 AZ 4863000 4315000 2811000 2412000
4 CA 28358000 23418000 15356000 13463000
5 CO 3817000 3543000 2635000 2495000
6 CT 2726000 2500000 1761000 1569000
7 DC 517000 461000 385000 351000
8 DE 693000 642000 469000 430000
9 FL 15033000 13326000 9102000 8107000
10 GA 7178000 6738000 4766000 4168000
11 HI 1012000 930000 547000 481000
12 IA 2319000 2232000 1745000 1548000
13 ID 1129000 1064000 744000 680000
14 IL 9650000 8831000 6424000 5428000
15 IN 4853000 4724000 3270000 2801000
16 KS 2120000 1974000 1467000 1249000
17 KY 3292000 3193000 2303000 1895000
18 LA 3320000 3239000 2498000 2149000
19 MA 5170000 4773000 3758000 3382000
20 MD 4449000 4006000 2888000 2610000
21 ME 1041000 1020000 787000 699000
22 MI 7496000 7228000 5621000 4831000
23 MN 4054000 3903000 3085000 2859000
24 MO 4520000 4409000 3383000 2818000
25 MS 2167000 2130000 1795000 1588000
26 MT 769000 754000 553000 495000
27 NC 7263000 6711000 5294000 4624000
28 ND 528000 515000 384000 328000
29 NE 1371000 1297000 901000 799000
30 NH 1029000 992000 752000 688000
31 NJ 6729000 5929000 4326000 3670000
32 NM 1554000 1426000 979000 878000
33 NV 2040000 1808000 1175000 1048000
34 NY 15066000 13081000 8886000 7676000
35 OH 8751000 8550000 6076000 5395000
36 OK 2809000 2734000 1805000 1431000
37 OR 2999000 2807000 2086000 1897000
38 PA 9848000 9451000 6794000 5824000
39 RI 818000 751000 552000 469000
40 SC 3516000 3381000 2479000 2187000
41 SD 616000 606000 454000 371000
42 TN 4849000 4678000 3211000 2606000
43 TX 18642000 16062000 10750000 8643000
44 UT 1916000 1793000 1137000 1022000
45 VA 6095000 5646000 4210000 3778000
46 VT 496000 487000 356000 307000
47 WA 5230000 4833000 3533000 3172000
48 WI 4351000 4247000 3318000 3128000
49 WV 1453000 1443000 983000 689000
50 WY 426000 419000 268000 247000
In [8]:
fig=px.choropleth(ABC,locations='Abbr.',locationmode='USA-states',color=np.log(ABC['Total Population']),hover_name='Abbr.',color_continuous_scale=px.colors.sequential.Inferno,scope='usa',range_color=(0,20),title='US population by State')
fig.update(layout_coloraxis_showscale=True)
fig.show()
In [9]:
AGE=abc.groupby('Age')['Total Population'].sum()
In [10]:
AGE
Out[10]:
Age
18 to 24    29881000
25 to 34    41145000
35 to 44    39624000
45 to 64    82085000
65+         42514000
Name: Total Population, dtype: int64
In [11]:
fig,ax=plt.subplots(figsize=(10,10))
AGE.plot.pie(y='Total Population',legend=True,shadow=True,colors=sns.color_palette('Set2'),ax=ax,autopct='%1.2f%%',fontsize=15)
c=plt.Circle((0,0),0.3,color='white')
plt.gca().add_artist(c)
plt.legend(loc='upper right')
plt.ylabel('Total US population by age group',size=15)
fig.show()
C:\Users\apimplikar\Desktop\Anaconda\lib\site-packages\ipykernel_launcher.py:7: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

In [12]:
fig=px.choropleth(ABC,locations='Abbr.',locationmode='USA-states',color=np.log(ABC['Citizen Population']),hover_name='Abbr.',color_continuous_scale=px.colors.sequential.Viridis,scope='usa',range_color=(0,20),title='US citizen population by State')
fig.update(layout_coloraxis_showscale=True)
fig.show()
In [13]:
fig,ax=plt.subplots(figsize=(10,10))
AGE.plot.pie(y='Citizen Population',legend=True,shadow=True,colors=sns.color_palette('Pastel1'),ax=ax,autopct='%1.2f%%',fontsize=15)
c=plt.Circle((0,0),0.3,color='white')
plt.gca().add_artist(c)
plt.legend(loc='upper right')
plt.ylabel('US citizen population by age group',size=15)
fig.show()
C:\Users\apimplikar\Desktop\Anaconda\lib\site-packages\ipykernel_launcher.py:7: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

In [14]:
sns.jointplot(x='Total Population',y='Citizen Population',color='red',data=ABC,height=8)
plt.xticks(size=20)
plt.yticks(size=20)
sns.set_style('darkgrid')
In [ ]:
 

Registered voters and confirmed voters in the US

In [15]:
ABC.iplot(x='Abbr.',y='Registered Voters',kind='bar',color='purple',xTitle='States',yTitle='Registered Voters',title='Number of Registered voters per state')
In [16]:
ABC.iplot(x='Citizen Population',y='Registered Voters',mode='markers',color='lightgreen',xTitle='Citizen Population',yTitle='Registered Voters',title='Relational analysis between citizens and registered voters')
In [17]:
sns.catplot(x='Age',y='Registered Voters',kind='violin',height=8,aspect=2,data=abc,palette='rainbow')
plt.xlabel('Age groups',size=20)
plt.xticks(size=20)
plt.ylabel('Number of Registered Voters',size=20)
plt.yticks(size=20)
plt.title('Frequency of Registered voters by age',size=20)
Out[17]:
Text(0.5, 1, 'Frequency of Registered voters by age')
In [18]:
ABC.iplot(x='Abbr.',y='Confirmed Voters',kind='bar',color='fuchsia',xTitle='States',yTitle='Confirmed Voters',title='Number of Confirmed voters per state')
In [19]:
ABC.iplot(x='Citizen Population',y='Confirmed Voters',mode='markers',color='navy',xTitle='Citizen Population',yTitle='Confirmed Voters',title='Relational analysis between citizens and confirmed voters')
In [20]:
sns.catplot(x='Age',y='Confirmed Voters',kind='violin',height=8,aspect=2,data=abc,palette='twilight')
plt.xlabel('Age groups',size=20)
plt.xticks(size=20)
plt.ylabel('Number of Confirmed Voters',size=20)
plt.yticks(size=20)
plt.title('Frequency of confirmed voters by age',size=20)
Out[20]:
Text(0.5, 1, 'Frequency of confirmed voters by age')
In [ ]:
 

Top 10 states with the highest voter turnout

In [21]:
#voter turnout %=(confirmed voters/registered voters)*100
In [22]:
ABC['Voter Turnout in %']=(ABC['Confirmed Voters']/ABC['Registered Voters'])*100
In [23]:
ABC
Out[23]:
Abbr. Total Population Citizen Population Registered Voters Confirmed Voters Voter Turnout in %
0 AK 516000 495000 360000 289000 80.277778
1 AL 3594000 3480000 2555000 2154000 84.305284
2 AR 2198000 2110000 1376000 1124000 81.686047
3 AZ 4863000 4315000 2811000 2412000 85.805763
4 CA 28358000 23418000 15356000 13463000 87.672571
5 CO 3817000 3543000 2635000 2495000 94.686907
6 CT 2726000 2500000 1761000 1569000 89.097104
7 DC 517000 461000 385000 351000 91.168831
8 DE 693000 642000 469000 430000 91.684435
9 FL 15033000 13326000 9102000 8107000 89.068337
10 GA 7178000 6738000 4766000 4168000 87.452791
11 HI 1012000 930000 547000 481000 87.934186
12 IA 2319000 2232000 1745000 1548000 88.710602
13 ID 1129000 1064000 744000 680000 91.397849
14 IL 9650000 8831000 6424000 5428000 84.495641
15 IN 4853000 4724000 3270000 2801000 85.657492
16 KS 2120000 1974000 1467000 1249000 85.139741
17 KY 3292000 3193000 2303000 1895000 82.283977
18 LA 3320000 3239000 2498000 2149000 86.028823
19 MA 5170000 4773000 3758000 3382000 89.994678
20 MD 4449000 4006000 2888000 2610000 90.373961
21 ME 1041000 1020000 787000 699000 88.818297
22 MI 7496000 7228000 5621000 4831000 85.945561
23 MN 4054000 3903000 3085000 2859000 92.674230
24 MO 4520000 4409000 3383000 2818000 83.298847
25 MS 2167000 2130000 1795000 1588000 88.467967
26 MT 769000 754000 553000 495000 89.511754
27 NC 7263000 6711000 5294000 4624000 87.344163
28 ND 528000 515000 384000 328000 85.416667
29 NE 1371000 1297000 901000 799000 88.679245
30 NH 1029000 992000 752000 688000 91.489362
31 NJ 6729000 5929000 4326000 3670000 84.835876
32 NM 1554000 1426000 979000 878000 89.683350
33 NV 2040000 1808000 1175000 1048000 89.191489
34 NY 15066000 13081000 8886000 7676000 86.383074
35 OH 8751000 8550000 6076000 5395000 88.791968
36 OK 2809000 2734000 1805000 1431000 79.279778
37 OR 2999000 2807000 2086000 1897000 90.939597
38 PA 9848000 9451000 6794000 5824000 85.722696
39 RI 818000 751000 552000 469000 84.963768
40 SC 3516000 3381000 2479000 2187000 88.221057
41 SD 616000 606000 454000 371000 81.718062
42 TN 4849000 4678000 3211000 2606000 81.158518
43 TX 18642000 16062000 10750000 8643000 80.400000
44 UT 1916000 1793000 1137000 1022000 89.885664
45 VA 6095000 5646000 4210000 3778000 89.738717
46 VT 496000 487000 356000 307000 86.235955
47 WA 5230000 4833000 3533000 3172000 89.782055
48 WI 4351000 4247000 3318000 3128000 94.273659
49 WV 1453000 1443000 983000 689000 70.091556
50 WY 426000 419000 268000 247000 92.164179
In [24]:
ABC_edited=ABC.sort_values('Voter Turnout in %',ascending=False,inplace=True)
In [25]:
ABC_edited=ABC.head(10)
In [26]:
ABC_edited
Out[26]:
Abbr. Total Population Citizen Population Registered Voters Confirmed Voters Voter Turnout in %
5 CO 3817000 3543000 2635000 2495000 94.686907
48 WI 4351000 4247000 3318000 3128000 94.273659
23 MN 4054000 3903000 3085000 2859000 92.674230
50 WY 426000 419000 268000 247000 92.164179
8 DE 693000 642000 469000 430000 91.684435
30 NH 1029000 992000 752000 688000 91.489362
13 ID 1129000 1064000 744000 680000 91.397849
7 DC 517000 461000 385000 351000 91.168831
37 OR 2999000 2807000 2086000 1897000 90.939597
20 MD 4449000 4006000 2888000 2610000 90.373961
In [27]:
fig=px.choropleth(ABC_edited,locations='Abbr.',locationmode='USA-states',color='Voter Turnout in %',hover_name='Abbr.',color_continuous_scale=px.colors.sequential.Electric,title='States with the highest turnout',scope="usa")
fig.update(layout_coloraxis_showscale=True)
fig.show()
In [ ]:
 

dataset imported from the resources from https://www.udemy.com/course/data-analysis-with-excel-pivot-tables/

attributes: Websites such as stackoverflow.com, plotlyexpress.com, geeksforgeeks.com etc.