The objective is to locate and recommend to the client which neighbourhood in New York City will be best choice to start a restaurant.
Data source : https://en.wikipedia.org/wiki/New_York_City ; https://en.wikipedia.org/wiki/Demographics_of_New_York_City. Web scraping techniques was used to get NYC population density and demographics data from Wikipedia.
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
#!conda install -c conda-forge folium
import os
import folium # map rendering library
# Matplotlib and associated plotting modules
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mp
import re
import csv
%matplotlib inline
print('Libraries imported.')
response_obj = requests.get('https://en.wikipedia.org/wiki/New_York_City').text
soup = BeautifulSoup(response_obj,'lxml')
Neighborhoods_NYC_Table = soup.find('table', {'class':'wikitable sortable'})
### preparation of the table
rows = Neighborhoods_NYC_Table.select("tbody > tr")[3:8]
boroughs = []
for row in rows:
borough = {}
tds = row.select('td')
borough["borough"] = tds[0].text.strip()
borough["county"] = tds[1].text.strip()
borough["population"] = float(tds[2].text.strip().replace(",",""))
borough["gdp_billions"] = float(tds[3].text.strip().replace(",",""))
borough["gdp_per_capita"] = float(tds[4].text.strip().replace(",",""))
borough["land_sqm"] = float(tds[5].text.strip().replace(",",""))
borough["land_sqkm"] = float(tds[6].text.strip().replace(",",""))
borough["persons_sqm"] = float(tds[7].text.strip().replace(",",""))
borough["persons_sqkm"] = float(tds[8].text.strip().replace(",",""))
boroughs.append(borough)
print(boroughs)
df = pd.DataFrame(boroughs, columns=["borough","county", "population", "gdp_per_capita", "persons_sqkm"]) df.head()
response_obj = requests.get('https://en.wikipedia.org/wiki/Demographics_of_New_York_City').text
soup = BeautifulSoup(response_obj,'lxml')
Population_Census_Table = soup.select_one('.wikitable:nth-of-type(5)') #use css selector to target correct table.
jurisdictions = []
rows = Population_Census_Table.select("tbody > tr")[3:8]
for row in rows:
jurisdiction = {}
tds = row.select('td')
jurisdiction["jurisdiction"] = tds[0].text.strip()
jurisdiction["population_census"] = tds[1].text.strip()
jurisdiction["%_white"] = float(tds[2].text.strip().replace(",",""))
jurisdiction["%_black_or_african_amercian"] = float(tds[3].text.strip().replace(",",""))
jurisdiction["%_Asian"] = float(tds[4].text.strip().replace(",",""))
jurisdiction["%_other"] = float(tds[5].text.strip().replace(",",""))
jurisdiction["%_mixed_race"] = float(tds[6].text.strip().replace(",",""))
jurisdiction["%_hispanic_latino_of_other_race"] = float(tds[7].text.strip().replace(",",""))
jurisdiction["%_catholic"] = float(tds[10].text.strip().replace(",",""))
jurisdiction["%_jewish"] = float(tds[12].text.strip().replace(",",""))
jurisdictions.append(jurisdiction)
print(jurisdictions)
df = pd.DataFrame(jurisdictions, columns=["jurisdiction","%_white", "%_black_or_african_amercian", "%_Asian", "%_other", "%_mixed_race", "%_hispanic_latino_of_other_race"])
df.head()
Preliminary finding indicates that