udacity / artificial-intelligence Goto Github PK
View Code? Open in Web Editor NEWLicense: MIT License
License: MIT License
#Import the standard Python Scientific Libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
#Import Plotly and use it in the Offline Mode
import plotly
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.tools as tls
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as fig_fact
plotly.tools.set_config_file(world_readable=True, sharing='public')
#Suppress Deprecation and Incorrect Usage Warnings
import warnings
warnings.filterwarnings('ignore')
mcq = pd.read_csv('multipleChoiceResponses.csv', encoding="ISO-8859-1", low_memory=False)
mcq.shape
#Load Free Form Responses into a Pandas DataFrame
ff = pd.read_csv('freeformResponses.csv', encoding="ISO-8859-1", low_memory=False)
ff.shape
sns.countplot(y='GenderSelect', data=mcq)
#Create a DataFrame for number of respondents by country
con_df = pd.DataFrame(mcq['Country'].value_counts())
con_df['country'] = con_df.index
con_df.columns = ['num_resp', 'country']
con_df = con_df.reset_index().drop('index', axis=1)
con_df.head(10)
#Create a Choropleth Map of the respondents using Plotly.
#Find out more at https://plot.ly/python/choropleth-maps/
data = [ dict(
type = 'choropleth',
locations = con_df['country'],
locationmode = 'country names',
z = con_df['num_resp'],
text = con_df['country'],
colorscale = [[0,'rgb(255, 255, 255)'],[1,'rgb(56, 142, 60)']],
autocolorscale = False,
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
tickprefix = '',
title = 'Survey Respondents'),
) ]
sns.countplot(y='LanguageRecommendationSelect', data=mcq)
layout = dict(
title = 'Survey Respondents by Nationality',
geo = dict(
showframe = False,
showcoastlines = False,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='survey-world-map')
#Get Summary Statistics of the Respndents' Ages.
mcq['Age'].describe()
#Plot the Age distribution
fig = fig_fact.create_distplot([mcq[mcq['Age'] > 0]['Age']], ['age'], colors=['#BA68C8'])
py.iplot(fig, filename='Basic Distplot')
#sns.distplot(mcq[mcq['Age'] > 0]['Age'])
sns.countplot(y='FormalEducation', data=mcq)
plt.figure(figsize=(6,8))
sns.countplot(y='MajorSelect', data=mcq)
sns.countplot(y='EmploymentStatus', data=mcq)
sns.countplot(y='Tenure', data=mcq)
sns.countplot(y='LanguageRecommendationSelect', data=mcq)
top_lang = mcq['LanguageRecommendationSelect'].value_counts()
top_lang_dist = []
for lang in top_lang.index:
top_lang_dist.append(mcq[(mcq['Age'].notnull()) & (mcq['LanguageRecommendationSelect'] == lang)]['Age'])
group_labels = top_lang.index
fig = fig_fact.create_distplot(top_lang_dist, group_labels, show_hist=False)
py.iplot(fig, filename='Language Preferences by Age')
mcq[mcq['CurrentJobTitleSelect'].notnull()]['CurrentJobTitleSelect'].shape
#Plot the number of R and Python users by Occupation
data = mcq[(mcq['CurrentJobTitleSelect'].notnull()) & ((mcq['LanguageRecommendationSelect'] == 'Python') | (mcq['LanguageRecommendationSelect'] == 'R'))]
plt.figure(figsize=(8, 10))
sns.countplot(y="CurrentJobTitleSelect", hue="LanguageRecommendationSelect", data=data)
#Render a bar plot of the 15 most popular ML Tools for next year
data = mcq['MLToolNextYearSelect'].value_counts().head(15)
sns.barplot(y=data.index, x=data)
data = mcq['MLMethodNextYearSelect'].value_counts().head(15)
sns.barplot(y=data.index, x=data)
#Explode the Pandas Dataframe to get the number of times each Learning Platform was mentioned
mcq['LearningPlatformSelect'] = mcq['LearningPlatformSelect'].astype('str').apply(lambda x: x.split(','))
s = mcq.apply(lambda x: pd.Series(x['LearningPlatformSelect']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'platform'
plt.figure(figsize=(6,8))
data = s[s != 'nan'].value_counts()
sns.barplot(y=data.index, x=data)
use_features = [x for x in mcq.columns if x.find('LearningPlatformUsefulness') != -1]
#Construct a Pandas DataFrame to illustrate the usefulness of various learning platforms.
fdf = {}
for feature in use_features:
a = mcq[feature].value_counts()
a = a/a.sum()
fdf[feature[len('LearningPlatformUsefulness'):]] = a
fdf = pd.DataFrame(fdf).transpose()#.sort_values('Very useful', ascending=False)
#Plot a Heatmap of Learning Platform Usefulness
plt.figure(figsize=(6,12))
sns.heatmap(fdf.sort_values("Very useful", ascending=False), annot=True)
#Plot a grouped barplot of Learning Platform Usefulness
fdf.plot(kind='bar', figsize=(18,8), title="Usefullness of Learning Platforms")
plt.show()
cat_features = [x for x in mcq.columns if x.find('LearningCategory') != -1]
cdf = {}
for feature in cat_features:
cdf[feature[len('LearningCategory'):]] = mcq[feature].mean()
cdf = pd.Series(cdf)
#Plot a Pie Chart of the contribution of each platform to learning
plt.pie(cdf, labels=cdf.index,
autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.title("Contribution of each Platform to Learning")
plt.show()
mcq[mcq['HardwarePersonalProjectsSelect'].notnull()]['HardwarePersonalProjectsSelect'].shape
mcq['HardwarePersonalProjectsSelect'] = mcq['HardwarePersonalProjectsSelect'].astype('str').apply(lambda x: x.split(','))
s = mcq.apply(lambda x: pd.Series(x['HardwarePersonalProjectsSelect']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'hardware'
s[s != 'nan'].value_counts()
plt.figure(figsize=(8,8))
sns.countplot(y='TimeSpentStudying', data=mcq, hue='EmploymentStatus').legend(loc='center left', bbox_to_anchor=(1, 0.5))
mcq['BlogsPodcastsNewslettersSelect'] = mcq['BlogsPodcastsNewslettersSelect'].astype('str').apply(lambda x: x.split(','))
s = mcq.apply(lambda x: pd.Series(x['BlogsPodcastsNewslettersSelect']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'platforms'
s = s[s != 'nan'].value_counts()
plt.figure(figsize=(6,8))
plt.title("Most Popular Blogs and Podcasts")
sns.barplot(y=s.index, x=s)
mcq['CoursePlatformSelect'] = mcq['CoursePlatformSelect'].astype('str').apply(lambda x: x.split(','))
t = mcq.apply(lambda x: pd.Series(x['CoursePlatformSelect']),axis=1).stack().reset_index(level=1, drop=True)
t.name = 'courses'
t = t[t != 'nan'].value_counts()
plt.title("Most Popular Course Platforms")
sns.barplot(y=t.index, x=t)
job_features = [x for x in mcq.columns if x.find('JobSkillImportance') != -1 and x.find('JobSkillImportanceOther') == -1]
#Get a Pandas DataFrame of Skill Importance of Data Science Jobs
jdf = {}
for feature in job_features:
a = mcq[feature].value_counts()
a = a/a.sum()
jdf[feature[len('JobSkillImportance'):]] = a
#fdf = pd.DataFrame(fdf)
jdf = pd.DataFrame(jdf).transpose()
jdf.plot(kind='bar', figsize=(12,6), title="Skill Importance in Data Science Jobs")
mcq[mcq['CompensationAmount'].notnull()].shape
#Convert all salary values to float. If not possible, convert it to NaN
def clean_salary(x):
x = x.replace(',', '')
try:
return float(x)
except:
return np.nan
#Function that outputs salary statistics and plots a salary distribution for that country
def salary_stats(country):
data = mcq[(mcq['CompensationAmount'].notnull()) & (mcq['Country'] == country) ]
data['CompensationAmount'] = data['CompensationAmount'].apply(clean_salary)
print(data[data['CompensationAmount'] < 1e9]['CompensationAmount'].describe())
sns.distplot(data[data['CompensationAmount'] < 1e9]['CompensationAmount'])
salary_stats('India')
salary_stats('United States')
mcq['PublicDatasetsSelect'] = mcq['PublicDatasetsSelect'].astype('str').apply(lambda x: x.split(','))
q = mcq.apply(lambda x: pd.Series(x['PublicDatasetsSelect']),axis=1).stack().reset_index(level=1, drop=True)
q.name = 'courses'
q = q[q != 'nan'].value_counts()
plt.title("Most Popular Dataset Platforms")
sns.barplot(y=q.index, x=q)
ff['PersonalProjectsChallengeFreeForm'].value_counts().head(15)
The heuristic score function should always be called from the POV of the player that initiates the search.
I suspect inaccuracies in the algorithm implementation from the Local Search exercise solutions.
node
against the utility of the problem
which is never updated:beam = [problem]
for t in range(self.epochs):
neighborhood = chain(*(n.successors() for n in beam)) # union of all neighbors
beam = sorted(neighborhood, key=lambda x: x.utility)[-self.beam_width:]
# !problem is never updated!
if all([node.utility < problem.utility for node in beam]): break
return beam[-1]
Shouldn't we compare the beam utilities with a solution from a previous step?
f
to the utility of a candidate regardless of its being accepted or rejected:f = [problem.utility] * self.Lfa
for i in range(self.epochs):
_problem = max(problem.successors(), key=lambda x: x.utility)
v = i % self.Lfa
if _problem.utility >= f[v] or _problem.utility >= problem.utility: problem = _problem
f[v] = _problem.utility # Shouldn't it be problem.utility?
return problem
As far as I understand the pseudocode, f[v] should be set to problem.utility
(not _problem.utility
!). Thus, it stays the same in case the candidate was rejected.
The layers implementation uses dictionaries for parents/children, so inclusion testing on those attributes maps back to the keys of the layer instead of the values. E.g., foo in literal_layer.children
iterates over the literals in the current layer, not the children.
It may be better to make the children and parents private, and expose a read-only getter that requires a key to make the API more clear.
There is a broken link here:
In the README:
3_Game Playing
, but should be 3_Adversarial Search
game_agent.py
, but should be my_customer_player.py
.The README file instructions for Part 2 mention that in order to test the code one should execute python -m unittest tests.test_my_pddl
while it should be python -m unittest tests.test_my_air_cargo_problems
This line:
5a146b5#diff-5731b1a6969a827c3a4f61a630b9eb92R30
states that OpenMoveEval
, ImprovedEval
andNullEval
are defined in game_agent.py
but they currently are in sample_players.py
The game playing agent test class should not reference "Project 1".
Having created Miniconda environment from aind-universal-v3.yml, I am getting the following error when trying to open the AIND-Constraint_Satisfaction.ipynb notebook:
jupyter kernel ImportError: No module named 'win32api'
It looks like the environment file is missing the pywin32 extension.
When testing h_maxlevel function, two conditions must be satisfied:
dataset = read.csv('Churn_Modelling.csv')
dataset = dataset[4:14]
dataset$Geography = as.numeric(factor(dataset$Geography,
levels = c('France', 'Spain', 'Germany'),
labels = c(1, 2, 3)))
dataset$Gender = as.numeric(factor(dataset$Gender,
levels = c('Female', 'Male'),
labels = c(1, 2)))
library(caTools)
set.seed(123)
split = sample.split(dataset$Exited, SplitRatio = 0.8)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
training_set[-11] = scale(training_set[-11])
test_set[-11] = scale(test_set[-11])
library(h2o)
h2o.init(nthreads = -1)
model = h2o.deeplearning(y = 'Exited',
training_frame = as.h2o(training_set),
activation = 'Rectifier',
hidden = c(5,5),
epochs = 100,
train_samples_per_iteration = -2)
y_pred = h2o.predict(model, newdata = as.h2o(test_set[-11]))
y_pred = (y_pred > 0.5)
y_pred = as.vector(y_pred)
cm = table(test_set[, 11], y_pred)
setwd("C:/Users/Jose/Desktop")
data <- read.csv("dataCS.txt", head=TRUE, sep="\t ")
index <- sample(1:nrow(data),round(0.75*nrow(data)))
train <- data[index,]
test <- data[-index,]
lm.fit <- glm(Irri~., data=train)
summary(lm.fit)
pr.lm <- predict(lm.fit,test)
MSE.lm <- sum((pr.lm - test$Irri)^2)/nrow(test)
maxs <- apply(data, 2, max)
mins <- apply(data, 2, min)
scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins))
train_ <- scaled[index,]
test_ <- scaled[-index,]
library(neuralnet)
n <- names(train_)
f <- as.formula(paste("Irri ~", paste(n[!n %in% "Irri"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(5,3),linear.output=F)
plot(nn)
pr.nn <- compute(nn,test_[,1:4])
pr.nn_ <- pr.nn$net.result*(max(data$Irri)-min(data$Irri))+min(data$Irri)
test.r <- (test_$Irri)*(max(data$Irri)-min(data$Irri))+min(data$Irri)
MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
print(paste(MSE.lm,MSE.nn))
par(mfrow=c(1,2))
#comparacion NN
plot(test$Irri,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
#comparacion LN
plot(test$Irri,pr.lm,col='blue',main='Real vs predicted linear model',pch=18, cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='linear model',pch=18,col='blue', bty='n', cex=.95)
#comparacion
plot(test$Irri,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
points(test$Irri,pr.lm,col='blue',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=18,col=c('red','blue'))
library(boot)
set.seed(200)
lm.fit <- glm(Irri~.,data=data)
cv.glm(data,lm.fit,K=10)$delta[1]
set.seed(450)
cv.error <- NULL
k <- 10
library(plyr)
pbar <- create_progress_bar('text')
pbar$init(k)
for(i in 1:k){
index <- sample(1:nrow(data),round(0.9*nrow(data)))
train.cv <- scaled[index,]
test.cv <- scaled[-index,]
nn <- neuralnet(f,data=train.cv,hidden=c(5,3),linear.output=F)
pr.nn <- compute(nn,test.cv[,1:4])
pr.nn <- pr.nn$net.result*(max(data$Irri)-min(data$Irri))+min(data$Irri)
test.cv.r <- (test.cv$Irri)*(max(data$Irri)-min(data$Irri))+min(data$Irri)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
pbar$step()
}
mean(cv.error)
boxplot(cv.error,xlab='MSE CV',col='cyan',
border='blue',names='CV error (MSE)',
main='CV error (MSE) for NN',horizontal=TRUE)
library(plyr)
pbar <- create_progress_bar('text')
pbar$init(k)
for(i in 1:k){
index <- sample(1:nrow(data),round(0.9*nrow(data)))
train.cv <- scaled[index,]
test.cv <- scaled[-index,]
nn <- neuralnet(f,data=train.cv,hidden=c(5,2),linear.output=T)
pr.nn <- compute(nn,test.cv[,1:4])
pr.nn <- pr.nn$net.result*(max(data$Irri)-min(data$Irri))+min(data$Irri)
test.cv.r <- (test.cv$Irri)*(max(data$Irri)-min(data$Irri))+min(data$Irri)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
pbar$step()
}
mean(cv.error)
boxplot(cv.error,xlab='MSE CV',col='cyan',
border='blue',names='CV error (MSE)',
main='CV error (MSE) for NN',horizontal=TRUE)
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.