Pandas 1

# Import all libraries needed for the tutorial

# General syntax to import specific functions in a library: 
##from (library) import (specific library function)
from pandas import DataFrame, read_csv

# General syntax to import a library but no functions: 
##import (library) as (give the library a nickname/alias)
import matplotlib.pyplot as plt
import pandas as pd #this is how I usually import pandas
import sys #only needed to determine Python version number
import matplotlib #only needed to determine Matplotlib version number

# Enable inline plotting
%matplotlib inline

print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)
Python version 3.5.1 |Anaconda custom (64-bit)| (default, Feb 16 2016, 09:49:46) [MSC v.1900 64 bit (AMD64)]
Pandas version 0.19.2
Matplotlib version 1.5.1

 

# The inital set of baby names and bith rates
names = ['Bob','Jessica','Mary','John','Mel']
births = [968, 155, 77, 578, 973]

To merge these two lists together we will use the zip function.

BabyDataSet = list(zip(names,births))
BabyDataSet
Out[5]:
[('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])
df
Out[6]:
Names Births
0 Bob 968
1 Jessica 155
2 Mary 77
3 John 578
4 Mel 973
df.to_csv('births1880.csv',index=False,header=False)
Location = r'C:\Users\david\notebooks\update\births1880.csv'
df = pd.read_csv(Location, names=['Names','Births'])
df
Out[13]:
Names Births
0 Bob 968
1 Jessica 155
2 Mary 77
3 John 578
4 Mel 973
# Check data type of the columns
df.dtypes
Out[15]:
Names     object
Births     int64
dtype: object

 

# Check data type of Births column
df.Births.dtype
Out[16]:
dtype('int64')

 

To find the most popular name or the baby name with the higest birth rate, we can do one of the following.

# Method 1:
Sorted = df.sort_values(['Births'], ascending=False)
Sorted.head(1)
Out[17]:
Names Births
4 Mel 973
# Method 2:
df['Births'].max()
Out[18]:
973

 

# Create graph
df['Births'].plot()

# Maximum value in the data set
MaxValue = df['Births'].max()

# Name associated with the maximum value
MaxName = df['Names'][df['Births'] == df['Births'].max()].values

# Text to display on graph
Text = str(MaxValue) + " - " + MaxName

# Add text to graph
plt.annotate(Text, xy=(1, MaxValue), xytext=(8, 0), 
                 xycoords=('axes fraction', 'data'), textcoords='offset points')

print("The most popular name")
df[df['Births'] == df['Births'].max()]
#Sorted.head(1) can also be used
The most popular name
Out[19]:
Names Births
4 Mel 973

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s