##Analysis:
#1. Average Fares are higher in rural areas.
#2. Urban areas had more drivers, rides and fare overall.
#3. Suburban areas have higher number of riders than rural but less than rural.
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Import Files And Merge
ride_data = pd.read_csv("raw_data/ride_data.csv")
city_data = pd.read_csv("raw_data/city_data.csv")
merged = pd.merge(city_data, ride_data, how='outer', on='city')
merged.head()
merged.columns=("City", "Driver Count", "Type", "Date", "Fare", "Ride ID")
merged.head()
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
City | Driver Count | Type | Date | Fare | Ride ID | |
---|---|---|---|---|---|---|
0 | Kelseyland | 63 | Urban | 2016-08-19 04:27:52 | 5.51 | 6246006544795 |
1 | Kelseyland | 63 | Urban | 2016-04-17 06:59:50 | 5.54 | 7466473222333 |
2 | Kelseyland | 63 | Urban | 2016-05-04 15:06:07 | 30.54 | 2140501382736 |
3 | Kelseyland | 63 | Urban | 2016-01-25 20:44:56 | 12.08 | 1896987891309 |
4 | Kelseyland | 63 | Urban | 2016-08-09 18:19:47 | 17.91 | 8784212854829 |
#groupby
merged_data = merged.groupby(["City","Type","Driver Count"])
#average fare
new_data = merged_data["Fare"].mean()
new_data = pd.DataFrame(new_data)
#total rides
new_data["Total Rides"] = merged_data["Ride ID"].nunique()
new_data.reset_index(inplace=True)
#reorganize
new_data.rename(columns={"Fare":"Average Fare"},inplace=True)
new_data.head()
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
City | Type | Driver Count | Average Fare | Total Rides | |
---|---|---|---|---|---|
0 | Alvarezhaven | Urban | 21 | 23.928710 | 31 |
1 | Alyssaberg | Urban | 67 | 20.609615 | 26 |
2 | Anitamouth | Suburban | 16 | 37.315556 | 9 |
3 | Antoniomouth | Urban | 21 | 23.625000 | 22 |
4 | Aprilchester | Urban | 49 | 21.981579 | 19 |
size = np.arange(0, 1000, 10)
figure= sns.lmplot(x='Total Rides', y='Average Fare', hue='Type', scatter_kws={"s": size, 'alpha':0.50,}, data=new_data, fit_reg=False)
plt.ylim(15, 45)
plt.xlim(0, 40)
plt.xlabel('Total Number of Rides Per City')
plt.ylabel('Average Fare')
plt.title('Pyber Rideshare Data')
plt.show()
#Percent of Total Fares by City Type
type_data = merged.groupby("Type")['Type', 'Fare', 'Ride ID', 'Driver Count']
fare = type_data.sum()["Fare"]
fare
labels = fare.index
explode = [.3 , 0, 0]
plt.pie(fare, startangle = 140, explode = explode, labels = labels,
autopct = "%1.1f%%",
shadow = True,
wedgeprops = {'linewidth': .2, 'edgecolor': 'black'})
plt.title("Percentage of Total Fares by City Type")
plt.show()
#Percentage of Total Rides by City Type
rides= type_data.count()["Ride ID"]
rides
labels = rides.index
explode = [.3 , 0, 0]
plt.pie(rides, startangle = 140, explode = explode, labels = labels, autopct = "%1.1f%%", shadow = True, wedgeprops = {'linewidth': .5, 'edgecolor': 'black'})
plt.title("Percentage of Total Rides by City Type")
plt.show()
#Percentage of Total Drivers by City Type
drivers= type_data.sum()["Driver Count"]
drivers
labels = rides.index
explode = [.3 , 0, 0]
plt.pie(drivers, startangle = 140, explode = explode, labels = labels, autopct = "%1.1f%%", shadow = True, wedgeprops = {'linewidth': .5, 'edgecolor': 'black'})
plt.title("Percentage of Total Drivers by City Type")
plt.show()