gallery/data entry fields
PANDAS REPLACE f()
from PadhAI courseware
padhai_replace

Next Exercise

Replace words with their abbreviations

  • Modify the method column to have only the abbreviation of each method
  • eg: instead of 'Radial Velocity', just make it RV and TR for 'Transit'
In [ ]:
import numpy as np
import pandas as pd
import seaborn as sns
In [ ]:
planets = sns.load_dataset('planets')
In [ ]:
planets = pd.DataFrame(planets)
In [ ]:
dfplanets = planets
In [ ]:
dfplanets.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1035 entries, 0 to 1034
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   method          1035 non-null   object 
 1   number          1035 non-null   int64  
 2   orbital_period  992 non-null    float64
 3   mass            513 non-null    float64
 4   distance        808 non-null    float64
 5   year            1035 non-null   int64  
dtypes: float64(3), int64(2), object(1)
memory usage: 48.6+ KB
In [ ]:
dfplanets.method.unique()
Out[ ]:
array(['Radial Velocity', 'Imaging', 'Eclipse Timing Variations',
       'Transit', 'Astrometry', 'Transit Timing Variations',
       'Orbital Brightness Modulation', 'Microlensing', 'Pulsar Timing',
       'Pulsation Timing Variations'], dtype=object)
In [ ]:
s = 'Radial Velocity'
In [ ]:
s.split()
Out[ ]:
['Radial', 'Velocity']
In [ ]:
[x[0] for x in s.split()]
Out[ ]:
['R', 'V']
In [ ]:
"".join([x[0] for x in s.split()])
Out[ ]:
'RV'
In [ ]:
s = 'Pulsation Timing Variations'
In [ ]:
"".join([x[0] for x in s.split()])
Out[ ]:
'PTV'

apply the above procedure to all the unique elements in the dataframe

In [ ]:
short_names = {}
for x in planets.method.unique():
    short_names[x] = "".join([x[0] for x in x.split()])
In [ ]:
short_names
Out[ ]:
{'Astrometry': 'A',
 'Eclipse Timing Variations': 'ETV',
 'Imaging': 'I',
 'Microlensing': 'M',
 'Orbital Brightness Modulation': 'OBM',
 'Pulsar Timing': 'PT',
 'Pulsation Timing Variations': 'PTV',
 'Radial Velocity': 'RV',
 'Transit': 'T',
 'Transit Timing Variations': 'TTV'}
In [ ]:
for i, r in planets.iterrows():
    planets.loc[i, 'short_method'] = short_names.get(r['method'], r['method'])
In [ ]:
planets.head()
Out[ ]:
method number orbital_period mass distance year short_method
0 Radial Velocity 1 269.300 7.10 77.40 2006 RV
1 Radial Velocity 1 874.774 2.21 56.95 2008 RV
2 Radial Velocity 1 763.000 2.60 19.84 2011 RV
3 Radial Velocity 1 326.030 19.40 110.62 2007 RV
4 Radial Velocity 1 516.220 10.50 119.47 2009 RV
In [ ]:
planets.tail()
Out[ ]:
method number orbital_period mass distance year short_method
1030 Transit 1 3.941507 NaN 172.0 2006 T
1031 Transit 1 2.615864 NaN 148.0 2007 T
1032 Transit 1 3.191524 NaN 174.0 2007 T
1033 Transit 1 4.125083 NaN 293.0 2008 T
1034 Transit 1 4.187757 NaN 260.0 2008 T

revise

In [ ]:
s = "Radial Velocity"
In [ ]:
s.split()
Out[ ]:
['Radial', 'Velocity']
In [ ]:
p = s.split()
In [ ]:
p
Out[ ]:
['Radial', 'Velocity']
In [ ]:
p[0]
Out[ ]:
'Radial'
In [ ]:
p[1]
Out[ ]:
'Velocity'
In [ ]:
[x[0] for x in p]
Out[ ]:
['R', 'V']
In [ ]:
"".join([x[0] for x in p])
Out[ ]:
'RV'
In [ ]:
s = '-'
str = ['a', 'b','c']
s.join(str)
Out[ ]:
'a-b-c'
In [ ]:
s = '-'
str1 = ['a', 'b','c']
str2 = ['1', '2', '3']
s.join(str1, str2)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-27-bb78c376e328> in <module>()
      2 str1 = ['a', 'b','c']
      3 str2 = ['1', '2', '3']
----> 4 s.join(str1, str2)

TypeError: join() takes exactly one argument (2 given)
In [ ]:
planets.method.unique()
Out[ ]:
array(['Radial Velocity', 'Imaging', 'Eclipse Timing Variations',
       'Transit', 'Astrometry', 'Transit Timing Variations',
       'Orbital Brightness Modulation', 'Microlensing', 'Pulsar Timing',
       'Pulsation Timing Variations'], dtype=object)
In [ ]:
s = 'Orbital Brightness Modulation'
In [ ]:
s.split()
Out[ ]:
['Orbital', 'Brightness', 'Modulation']
In [ ]:
s.split()[1]
Out[ ]:
'Brightness'
In [ ]:
[x[0] for x in s.split()]
Out[ ]:
['O', 'B', 'M']
In [ ]:
"".join(x[0] for x in s.split())
Out[ ]:
'OBM'
In [ ]:
short_names = planets.method.unique()
In [ ]:
print(x[i] for i in planets.method.unique())
<generator object <genexpr> at 0x7f4a5b2bde50>
In [ ]:
print(short_names)
['Radial Velocity' 'Imaging' 'Eclipse Timing Variations' 'Transit'
 'Astrometry' 'Transit Timing Variations' 'Orbital Brightness Modulation'
 'Microlensing' 'Pulsar Timing' 'Pulsation Timing Variations']
In [ ]:
short_names[1]
Out[ ]:
'Imaging'
In [ ]:
short_names[2].split()
Out[ ]:
['Eclipse', 'Timing', 'Variations']
In [ ]:
"".join([x[0] for x in short_names[5].split()])
Out[ ]:
'TTV'
  • take an empty dictionary to accomodate both the long form and short forms
  • take each element in the planets.method.unique() - say 's'
  • split s to get first character of each word in s
  • join each character
In [ ]:
planets_dic = {}
for s in planets.method.unique():
    planets_dic[s] = "".join([x[0] for x in s.split()])
In [ ]:
planets_dic
Out[ ]:
{'Astrometry': 'A',
 'Eclipse Timing Variations': 'ETV',
 'Imaging': 'I',
 'Microlensing': 'M',
 'Orbital Brightness Modulation': 'OBM',
 'Pulsar Timing': 'PT',
 'Pulsation Timing Variations': 'PTV',
 'Radial Velocity': 'RV',
 'Transit': 'T',
 'Transit Timing Variations': 'TTV'}
In [ ]:
for i, r in planets.iterrows():
    planets.loc[i, "abbrev"] = planets_dic.get(r['method'], r['method'])
In [ ]:
planets.head()
Out[ ]:
method number orbital_period mass distance year abbrev
0 Radial Velocity 1 269.300 7.10 77.40 2006 RV
1 Radial Velocity 1 874.774 2.21 56.95 2008 RV
2 Radial Velocity 1 763.000 2.60 19.84 2011 RV
3 Radial Velocity 1 326.030 19.40 110.62 2007 RV
4 Radial Velocity 1 516.220 10.50 119.47 2009 RV
In [ ]:
planets.tail()
Out[ ]:
method number orbital_period mass distance year abbrev
1030 Transit 1 3.941507 NaN 172.0 2006 T
1031 Transit 1 2.615864 NaN 148.0 2007 T
1032 Transit 1 3.191524 NaN 174.0 2007 T
1033 Transit 1 4.125083 NaN 293.0 2008 T
1034 Transit 1 4.187757 NaN 260.0 2008 T
In [ ]:
for i, r in planets.iterrows():
    planets.loc[i, "abbre"] = planets_dic.get(r['method'], r['method'])
In [ ]:
planets.head()
Out[ ]:
method number orbital_period mass distance year abbrev abbre
0 Radial Velocity 1 269.300 7.10 77.40 2006 RV RV
1 Radial Velocity 1 874.774 2.21 56.95 2008 RV RV
2 Radial Velocity 1 763.000 2.60 19.84 2011 RV RV
3 Radial Velocity 1 326.030 19.40 110.62 2007 RV RV
4 Radial Velocity 1 516.220 10.50 119.47 2009 RV RV
In [ ]:
planets.drop('abbre', axis=1, inplace = True)
In [ ]:
planets.head()
Out[ ]:
method number orbital_period mass distance year abbrev
0 Radial Velocity 1 269.300 7.10 77.40 2006 RV
1 Radial Velocity 1 874.774 2.21 56.95 2008 RV
2 Radial Velocity 1 763.000 2.60 19.84 2011 RV
3 Radial Velocity 1 326.030 19.40 110.62 2007 RV
4 Radial Velocity 1 516.220 10.50 119.47 2009 RV

Pandas way of doing the above activity

  • creating dictionary is common step
In [ ]:
df_planets = sns.load_dataset('planets')
In [ ]:
method_list = df_planets.method.unique()
In [ ]:
method_list
Out[ ]:
array(['Radial Velocity', 'Imaging', 'Eclipse Timing Variations',
       'Transit', 'Astrometry', 'Transit Timing Variations',
       'Orbital Brightness Modulation', 'Microlensing', 'Pulsar Timing',
       'Pulsation Timing Variations'], dtype=object)
In [ ]:
method_dic = {}
for i in method_list:
    method_dic[i] = "".join(x[0] for x in i.split())
In [ ]:
method_dic
Out[ ]:
{'Astrometry': 'A',
 'Eclipse Timing Variations': 'ETV',
 'Imaging': 'I',
 'Microlensing': 'M',
 'Orbital Brightness Modulation': 'OBM',
 'Pulsar Timing': 'PT',
 'Pulsation Timing Variations': 'PTV',
 'Radial Velocity': 'RV',
 'Transit': 'T',
 'Transit Timing Variations': 'TTV'}
In [ ]:
def method_shorts(s):
   return method_dic.get(s, s)
In [ ]:
df_planets['short_method'] = df_planets['method'].apply(method_shorts)
In [ ]:
df_planets
Out[ ]:
method number orbital_period mass distance year short_method
0 Radial Velocity 1 269.300000 7.10 77.40 2006 RV
1 Radial Velocity 1 874.774000 2.21 56.95 2008 RV
2 Radial Velocity 1 763.000000 2.60 19.84 2011 RV
3 Radial Velocity 1 326.030000 19.40 110.62 2007 RV
4 Radial Velocity 1 516.220000 10.50 119.47 2009 RV
... ... ... ... ... ... ... ...
1030 Transit 1 3.941507 NaN 172.00 2006 T
1031 Transit 1 2.615864 NaN 148.00 2007 T
1032 Transit 1 3.191524 NaN 174.00 2007 T
1033 Transit 1 4.125083 NaN 293.00 2008 T
1034 Transit 1 4.187757 NaN 260.00 2008 T

1035 rows × 7 columns

Next Task

  • count of planets discovered per method type