MATPLOT-LIB PACKAGE
CW - Pie, Donut, Nested Donut
matplotlib_pie

Data Visualisation Part II - with MATPLOT LIB

Pie Chart, Donut and Nested Donut charts

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use(['dark_background'])
import seaborn as sns
sns.set(color_codes = True)

Plotting with composition of data

  • for example, if there are a group of students who passed X class board exam, how many of them are boys and how many of them are girls
  • considering Covid dataset, how many of them are recovered, deceased, suffering....

Static composition

Pie Chart

In [2]:
pen = sns.load_dataset('penguins')
In [3]:
pen.head(2)
Out[3]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
In [4]:
pen.groupby('species').count()
Out[4]:
island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
species
Adelie 152 151 151 151 151 146
Chinstrap 68 68 68 68 68 68
Gentoo 124 123 123 123 123 119
In [5]:
pen.groupby('species')['species'].count()
Out[5]:
species
Adelie       152
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [6]:
c = pen.groupby('species')['species'].count()
In [42]:
c
Out[42]:
species
Adelie       152
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [7]:
plt.pie(c)
plt.show()
In [8]:
plt.pie(c, labels = c.index, autopct="%.2f%%")
plt.show()
In [9]:
plt.pie(c, labels = c.index, autopct="%.2f%%", explode=[0,1,0], startangle = 180)
plt.show()
In [10]:
plt.pie(np.random.randint(0, 10, 10), autopct="%.2f%%", pctdistance=1);

Donut chart

In [60]:
plt.pie(np.random.randint(0, 10, 10));
plt.show()
In [61]:
plt.pie(np.random.randint(1, 10, 10), wedgeprops=dict(width=0.25));

Matplotlib Color Map ( - Qualitative Color Maps)

https://matplotlib.org/stable/tutorials/colors/colormaps.html

In [94]:
cmap = plt.get_cmap('Paired')
my_colors = cmap(np.arange(10))
In [95]:
plt.pie(np.random.randint(1, 10, 10), 
        wedgeprops=dict(width=0.25), 
        colors = my_colors);
In [96]:
c
Out[96]:
species
Adelie       152
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [97]:
plt.pie(c, labels = c.index, 
        autopct = "%.2f%%", 
        wedgeprops=dict(width=0.25), colors = my_colors);
In [98]:
c_i = pen.groupby('island')['island'].count()
c_i
Out[98]:
island
Biscoe       168
Dream        124
Torgersen     52
Name: island, dtype: int64
In [99]:
plt.pie(c_i, labels = c_i.index, autopct="%0.2f",
        wedgeprops = dict(width=0.3),
        colors = my_colors);
In [90]:
cmap = plt.get_cmap('tab10')
my_colors = cmap(np.arange(10))
In [93]:
plt.pie(c_i, autopct="%0.2f%%", labels = c_i.index, 
        wedgeprops=dict(width=0.3));

Crosstab in Pandas

In [101]:
pd.crosstab(pen.species, pen.island)
Out[101]:
island Biscoe Dream Torgersen
species
Adelie 44 56 52
Chinstrap 0 68 0
Gentoo 124 0 0
In [102]:
species_count = pd.crosstab(pen.species, pen.island)
In [103]:
species_count
Out[103]:
island Biscoe Dream Torgersen
species
Adelie 44 56 52
Chinstrap 0 68 0
Gentoo 124 0 0

to get islands on the outer rim

In [104]:
species_count = species_count.T
In [107]:
sc = species_count
In [110]:
sc # island as index and the corresponding cols as values
Out[110]:
species Adelie Chinstrap Gentoo
island
Biscoe 44 0 124
Dream 56 68 0
Torgersen 52 0 0
In [112]:
plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3));
In [114]:
plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3));
plt.pie(sc.values.flatten(), radius=0.7, wedgeprops = dict(width=0.3));
In [116]:
cmap = plt.get_cmap('tab20c')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))
In [118]:
plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
plt.pie(sc.values.flatten(), radius=0.7, 
        wedgeprops = dict(width=0.3), colors = inner_colors);
In [122]:
plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
plt.pie(sc.values.flatten(), radius=0.7, 
        labels = ['A,', 'C', 'G', 'A,', 'C', 'G', 'A,', 'C', 'G' ],
        wedgeprops = dict(width=0.3), colors = inner_colors);
In [124]:
plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
plt.pie(sc.values.flatten(), radius=0.7, 
        labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
        wedgeprops = dict(width=0.3), colors = inner_colors,
        labeldistance = 0.8);
In [129]:
cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))

plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
plt.pie(sc.values.flatten(), radius=0.7, 
        labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
        wedgeprops = dict(width=0.3), colors = inner_colors,
        labeldistance = 0.8);
In [131]:
cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))

plt.pie(sc.sum(axis=1), labels = sc.index, 
        radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
plt.pie(sc.values.flatten(), radius=0.7, 
        labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
        wedgeprops = dict(width=0.3), colors = inner_colors,
        labeldistance = 0.8, 
        textprops = dict(color='w'));
In [19]:
import json
import urllib.request
In [20]:
url = 'https://api.covid19india.org/states_daily.json'
urllib.request.urlretrieve(url, 'data.json')

with open('data.json') as f:
    data = json.load(f)
data = data['states_daily']
df = pd.json_normalize(data)
In [24]:
df_ = df.tail(3)
In [25]:
df_
Out[25]:
an ap ar as br ch ct date dateymd dd ... sk status tg tn tr tt un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 2021-08-16 0 ... 20 Confirmed 405 1851 52 24696 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 2021-08-16 0 ... 147 Recovered 577 1911 223 36871 0 36 54 691
1562 0 13 0 10 0 0 1 16-Aug-21 2021-08-16 0 ... 0 Deceased 3 28 1 438 0 1 1 9

3 rows × 42 columns

In [28]:
df_.drop('dateymd', inplace=True, axis=1)
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
In [29]:
df_.head(2)
Out[29]:
an ap ar as br ch ct date dd dl ... sk status tg tn tr tt un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 0 27 ... 20 Confirmed 405 1851 52 24696 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 0 73 ... 147 Recovered 577 1911 223 36871 0 36 54 691

2 rows × 41 columns

In [30]:
df_.drop('tt', inplace = True, axis=1)
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
In [31]:
df_.head(2)
Out[31]:
an ap ar as br ch ct date dd dl ... rj sk status tg tn tr un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 0 27 ... 11 20 Confirmed 405 1851 52 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 0 73 ... 44 147 Recovered 577 1911 223 0 36 54 691

2 rows × 40 columns

In [32]:
df_.drop('date', axis=1, inplace=True)
df_.set_index('status', inplace = True)
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
In [35]:
df_ = df_.T
In [36]:
df_
Out[36]:
an ap ar as br ch ct dd dl dn ... py rj sk tg tn tr un up ut wb
status
Confirmed 1 909 165 758 14 2 68 0 27 2 ... 49 11 20 405 1851 52 0 17 18 502
Recovered 0 1543 249 1014 42 3 224 0 73 0 ... 86 44 147 577 1911 223 0 36 54 691
Deceased 0 13 0 10 0 0 1 0 0 0 ... 0 0 0 3 28 1 0 1 1 9

3 rows × 38 columns

In [37]:
df_ = df_.apply(pd.to_numeric)
In [39]:
df_ = df_.T
In [40]:
df_.head(2)
Out[40]:
status Confirmed Recovered Deceased
an 1 0 0
ap 909 1543 13
In [134]:
!pip install nbconvert
In [ ]:
%shell jupyter nbconvert --to html /content/testfile.ipynb