The matplotlip's intro from blakezara

##Analysis:

#1. Average Fares are higher in rural areas.

#2. Urban areas had more drivers, rides and fare overall.

#3. Suburban areas have higher number of riders than rural but less than rural.

# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import Files And Merge

ride_data = pd.read_csv("raw_data/ride_data.csv")
city_data = pd.read_csv("raw_data/city_data.csv")

merged = pd.merge(city_data, ride_data, how='outer', on='city')
merged.head()

merged.columns=("City", "Driver Count", "Type", "Date", "Fare", "Ride ID")


merged.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	City	Driver Count	Type	Date	Fare	Ride ID
0	Kelseyland	63	Urban	2016-08-19 04:27:52	5.51	6246006544795
1	Kelseyland	63	Urban	2016-04-17 06:59:50	5.54	7466473222333
2	Kelseyland	63	Urban	2016-05-04 15:06:07	30.54	2140501382736
3	Kelseyland	63	Urban	2016-01-25 20:44:56	12.08	1896987891309
4	Kelseyland	63	Urban	2016-08-09 18:19:47	17.91	8784212854829

#groupby
merged_data = merged.groupby(["City","Type","Driver Count"])

#average fare
new_data = merged_data["Fare"].mean()
new_data = pd.DataFrame(new_data)

#total rides
new_data["Total Rides"] = merged_data["Ride ID"].nunique()
new_data.reset_index(inplace=True)

#reorganize
new_data.rename(columns={"Fare":"Average Fare"},inplace=True)

new_data.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	City	Type	Driver Count	Average Fare	Total Rides
0	Alvarezhaven	Urban	21	23.928710	31
1	Alyssaberg	Urban	67	20.609615	26
2	Anitamouth	Suburban	16	37.315556	9
3	Antoniomouth	Urban	21	23.625000	22
4	Aprilchester	Urban	49	21.981579	19

size = np.arange(0, 1000, 10)

figure= sns.lmplot(x='Total Rides', y='Average Fare', hue='Type', scatter_kws={"s": size, 'alpha':0.50,}, data=new_data, fit_reg=False)

plt.ylim(15, 45)
plt.xlim(0, 40)
plt.xlabel('Total Number of Rides Per City')
plt.ylabel('Average Fare')

plt.title('Pyber Rideshare Data')

plt.show()

#Percent of Total Fares by City Type
type_data = merged.groupby("Type")['Type', 'Fare', 'Ride ID', 'Driver Count']

fare = type_data.sum()["Fare"]                           
fare

labels = fare.index
explode = [.3 , 0, 0]

plt.pie(fare, startangle = 140, explode = explode, labels = labels, 
        autopct = "%1.1f%%", 
        shadow = True, 
        wedgeprops = {'linewidth': .2, 'edgecolor': 'black'})

plt.title("Percentage of Total Fares by City Type")
plt.show()

#Percentage of Total Rides by City Type

rides= type_data.count()["Ride ID"]
rides

labels = rides.index
explode = [.3 , 0, 0]

plt.pie(rides, startangle = 140, explode = explode, labels = labels, autopct = "%1.1f%%", shadow = True, wedgeprops = {'linewidth': .5, 'edgecolor': 'black'})

plt.title("Percentage of Total Rides by City Type")

plt.show()

#Percentage of Total Drivers by City Type

drivers= type_data.sum()["Driver Count"]
drivers

labels = rides.index
explode = [.3 , 0, 0]

plt.pie(drivers, startangle = 140, explode = explode, labels = labels, autopct = "%1.1f%%", shadow = True, wedgeprops = {'linewidth': .5, 'edgecolor': 'black'})

plt.title("Percentage of Total Drivers by City Type")
plt.show()

blakezara / matplotlip Goto Github PK

matplotlip's Introduction

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent