Importing Libraries¶
In [1]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import matplotlib.pyplot as plt
import seaborn;
%matplotlib inline
Reading data¶
In [2]:
date_parser = lambda date: pd.datetime.strptime(date, '%d/%m/%Y')
df = pd.read_csv('../data/startup_funding.csv', date_parser=date_parser, parse_dates=['Date'],
thousands=',')
df.drop(['SNo'], axis=1, inplace=True)
Data headshot¶
In [3]:
df.head()
Out[3]:
In [4]:
fig = plt.figure(figsize=(16, 8))
fig.suptitle('Investment over time', fontsize=24, fontweight='bold')
ax = fig.add_subplot(211)
ts1 = df.groupby([df['Date'].dt.year, df['Date'].dt.quarter]).agg({'AmountInUSD':'sum'})['AmountInUSD']
ts1.plot(linewidth=4, marker="o", markersize=15, markerfacecolor='green')
plt.ylabel('USD in Billions')
plt.xlabel('Year-Quarter No.')
# plt.title('Total Investment over time')
ax = fig.add_subplot(212)
ts1 = df.groupby([df['Date'].dt.year, df['Date'].dt.month]).agg({'AmountInUSD':'sum'})['AmountInUSD']
ts1.plot(linewidth=4, marker="o", markersize=15, markerfacecolor='green')
plt.ylabel('USD in Billions')
plt.xlabel('Year-Month')
Out[4]:
Grouping by index of dataframe¶
In [5]:
df = df.set_index(['Date'])
df.head()
Out[5]:
In [6]:
ts = df.groupby(['Date']).agg({'AmountInUSD':'sum'})['AmountInUSD']
In [7]:
ts.head()
Out[7]: