Problem Statement

The objective is to locate and recommend to the client which neighbourhood in New York City will be best choice to start a restaurant.

New York city population and demographic data

Data source : https://en.wikipedia.org/wiki/New_York_City ; https://en.wikipedia.org/wiki/Demographics_of_New_York_City. Web scraping techniques was used to get NYC population density and demographics data from Wikipedia.

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
#!conda install -c conda-forge folium

import os
import folium # map rendering library
# Matplotlib and associated plotting modules

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mp
import re
import csv
%matplotlib inline


print('Libraries imported.')
Libraries imported.
In [2]:
response_obj = requests.get('https://en.wikipedia.org/wiki/New_York_City').text
soup = BeautifulSoup(response_obj,'lxml')
Neighborhoods_NYC_Table = soup.find('table', {'class':'wikitable sortable'})
### preparation of the table 
In [3]:
rows = Neighborhoods_NYC_Table.select("tbody > tr")[3:8]

boroughs = []
for row in rows:
    borough = {}
    tds = row.select('td')
    borough["borough"] = tds[0].text.strip()
    borough["county"] = tds[1].text.strip()
    borough["population"] = float(tds[2].text.strip().replace(",",""))
    borough["gdp_billions"] = float(tds[3].text.strip().replace(",",""))
    borough["gdp_per_capita"] = float(tds[4].text.strip().replace(",",""))
    borough["land_sqm"] = float(tds[5].text.strip().replace(",",""))
    borough["land_sqkm"] = float(tds[6].text.strip().replace(",",""))
    borough["persons_sqm"] = float(tds[7].text.strip().replace(",",""))
    borough["persons_sqkm"] = float(tds[8].text.strip().replace(",",""))
    
    boroughs.append(borough)

print(boroughs)
[{'borough': 'The Bronx', 'county': 'Bronx', 'population': 1471160.0, 'gdp_billions': 28.787, 'gdp_per_capita': 19570.0, 'land_sqm': 42.1, 'land_sqkm': 109.04, 'persons_sqm': 34653.0, 'persons_sqkm': 13231.0}, {'borough': 'Brooklyn', 'county': 'Kings', 'population': 2648771.0, 'gdp_billions': 63.303, 'gdp_per_capita': 23900.0, 'land_sqm': 70.82, 'land_sqkm': 183.42, 'persons_sqm': 37137.0, 'persons_sqkm': 14649.0}, {'borough': 'Manhattan', 'county': 'New York', 'population': 1664727.0, 'gdp_billions': 629.682, 'gdp_per_capita': 378250.0, 'land_sqm': 22.83, 'land_sqkm': 59.13, 'persons_sqm': 72033.0, 'persons_sqkm': 27826.0}, {'borough': 'Queens', 'county': 'Queens', 'population': 2358582.0, 'gdp_billions': 73.842, 'gdp_per_capita': 31310.0, 'land_sqm': 108.53, 'land_sqkm': 281.09, 'persons_sqm': 21460.0, 'persons_sqkm': 8354.0}, {'borough': 'Staten Island', 'county': 'Richmond', 'population': 479458.0, 'gdp_billions': 11.249, 'gdp_per_capita': 23460.0, 'land_sqm': 58.37, 'land_sqkm': 151.18, 'persons_sqm': 8112.0, 'persons_sqkm': 3132.0}]

df = pd.DataFrame(boroughs, columns=["borough","county", "population", "gdp_per_capita", "persons_sqkm"]) df.head()

In [5]:
response_obj = requests.get('https://en.wikipedia.org/wiki/Demographics_of_New_York_City').text
soup = BeautifulSoup(response_obj,'lxml')
Population_Census_Table = soup.select_one('.wikitable:nth-of-type(5)') #use css selector to target correct table.

jurisdictions = []
rows = Population_Census_Table.select("tbody > tr")[3:8]
for row in rows:
    jurisdiction = {}
    tds = row.select('td')
    jurisdiction["jurisdiction"] = tds[0].text.strip()
    jurisdiction["population_census"] = tds[1].text.strip()
    jurisdiction["%_white"] = float(tds[2].text.strip().replace(",",""))
    jurisdiction["%_black_or_african_amercian"] = float(tds[3].text.strip().replace(",",""))
    jurisdiction["%_Asian"] = float(tds[4].text.strip().replace(",",""))
    jurisdiction["%_other"] = float(tds[5].text.strip().replace(",",""))
    jurisdiction["%_mixed_race"] = float(tds[6].text.strip().replace(",",""))
    jurisdiction["%_hispanic_latino_of_other_race"] = float(tds[7].text.strip().replace(",",""))
    jurisdiction["%_catholic"] = float(tds[10].text.strip().replace(",",""))
    jurisdiction["%_jewish"] = float(tds[12].text.strip().replace(",",""))
    jurisdictions.append(jurisdiction)

print(jurisdictions)
[{'jurisdiction': 'Queens', 'population_census': '2,229,379', '%_white': 44.1, '%_black_or_african_amercian': 20.0, '%_Asian': 17.6, '%_other': 12.3, '%_mixed_race': 6.1, '%_hispanic_latino_of_other_race': 25.0, '%_catholic': 37.0, '%_jewish': 5.0}, {'jurisdiction': 'Manhattan', 'population_census': '1,537,195', '%_white': 54.4, '%_black_or_african_amercian': 17.4, '%_Asian': 9.4, '%_other': 14.7, '%_mixed_race': 4.1, '%_hispanic_latino_of_other_race': 27.2, '%_catholic': 11.0, '%_jewish': 9.0}, {'jurisdiction': 'Bronx', 'population_census': '1,332,650', '%_white': 29.9, '%_black_or_african_amercian': 35.6, '%_Asian': 3.0, '%_other': 25.7, '%_mixed_race': 5.8, '%_hispanic_latino_of_other_race': 48.4, '%_catholic': 14.0, '%_jewish': 5.0}, {'jurisdiction': 'Staten Island', 'population_census': '443,728', '%_white': 77.6, '%_black_or_african_amercian': 9.7, '%_Asian': 5.7, '%_other': 4.3, '%_mixed_race': 2.7, '%_hispanic_latino_of_other_race': 12.1, '%_catholic': 11.0, '%_jewish': 5.0}, {'jurisdiction': 'NYC Total', 'population_census': '8,008,278', '%_white': 44.7, '%_black_or_african_amercian': 26.6, '%_Asian': 9.8, '%_other': 14.0, '%_mixed_race': 4.9, '%_hispanic_latino_of_other_race': 27.0, '%_catholic': 17.0, '%_jewish': 6.0}]
In [6]:
df = pd.DataFrame(jurisdictions, columns=["jurisdiction","%_white", "%_black_or_african_amercian", "%_Asian", "%_other", "%_mixed_race", "%_hispanic_latino_of_other_race"])
df.head()
Out[6]:
jurisdiction %_white %_black_or_african_amercian %_Asian %_other %_mixed_race %_hispanic_latino_of_other_race
0 Queens 44.1 20.0 17.6 12.3 6.1 25.0
1 Manhattan 54.4 17.4 9.4 14.7 4.1 27.2
2 Bronx 29.9 35.6 3.0 25.7 5.8 48.4
3 Staten Island 77.6 9.7 5.7 4.3 2.7 12.1
4 NYC Total 44.7 26.6 9.8 14.0 4.9 27.0

Preliminary finding indicates that

  1. Queens is the second most populous urban area in New York City (NYC), behind Brooklyn however, it is the most ethnically diverse urban area in NYC with the highest Asian ethnic minority population.
  2. Despite the fact that Mantthan is the third most populous urban area in New York City (NYC), it has a population density of 27,826 people per square km, highest of any borough in the United States. It has the second highest Asian ethnic minority population in NYC.
In [ ]: