# Let's setup the imports we'll need first
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import geopandas as gpd


# Countries and cities shapefiles
countries = gpd.read_file("./data/ne_110m_admin_0_countries")
cities = gpd.read_file("./data/ne_110m_populated_places")


countries.head()


cities.head()


joined = gpd.sjoin(cities, countries, predicate='within', how='left')


joined.head()


cities_in_italy = joined.loc[joined['name_right'] == 'Italy']
cities_in_italy


# Extract Italy
italy = countries.loc[countries["name"] == "Italy"]

# Plot
fig, ax = plt.subplots(figsize=(8, 8))
italy.plot(ax=ax, facecolor="none", edgecolor="black")
ax.set_axis_off()
ax.set_aspect("equal")

# Plot the first city in the joined data frame (Vatican City)
# Use the same axes by passing in the ax=ax keyword
ax = cities_in_italy.plot(ax=ax, color="red")


africa = countries.loc[countries['continent'] == 'Africa']


africa.head()


## What crs?
africa.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich


# Let's transform to a CRS that uses meters 
# instead of degrees (EPSG=3857)
africa = africa.to_crs(epsg=3857)

africa.crs

<Derived Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World between 85.06°S and 85.06°N.
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich


fig, ax = plt.subplots(figsize=(8,8))

africa.plot(ax=ax, facecolor='#b9f2b1')

ax.set_axis_off()
ax.set_aspect("equal")


# Important CRS needs to match!
cities = cities.to_crs(epsg=3857)


# Create a copy of the GeoDataFrame
buffered_cities = cities.copy()

# Add a buffer region of 250 km around all cities
buffered_cities['geometry'] = buffered_cities.buffer(250e3)


fig, ax = plt.subplots(figsize=(8, 8))

# Calculate the difference of the geometry sets
diff = gpd.overlay(africa, buffered_cities, how='difference',)

# Plot
diff.plot(facecolor="#b9f2b1", ax=ax)
ax.set_axis_off()
ax.set_aspect("equal")


# Data attributes are the same as the first data frame (africa) 
# with an updated geometry column
diff.head()


fig, ax = plt.subplots(figsize=(8, 8))

# The intersection of the geometry sets
intersection = gpd.overlay(africa, buffered_cities, how='intersection')

# Plot
intersection.plot(ax=ax, facecolor="#b9f2b1")
ax.set_axis_off()
ax.set_aspect("equal")


# Load the data from a CSV file into a pandas DataFrame
requests = pd.read_csv('./data/public_cases_fc_2020.csv.tar.gz')

/var/folders/49/ntrr94q12xd4rq8hqdnx96gm0000gn/T/ipykernel_40811/38353759.py:2: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.
  requests = pd.read_csv('./data/public_cases_fc_2020.csv.tar.gz')


print("number of requests = ", len(requests))

number of requests =  727746


requests.head()


requests = requests.dropna(subset=['lat', 'lon'])


requests['Coordinates'] = gpd.points_from_xy(requests['lon'], requests['lat'])


requests['Coordinates'].head()

13    POINT (-75.16257 40.04816)
18    POINT (-75.18500 40.03733)
21    POINT (-75.20887 40.02665)
24    POINT (-75.10652 40.03439)
25    POINT (-75.16350 39.93616)
Name: Coordinates, dtype: geometry


requests = gpd.GeoDataFrame(requests, 
                            geometry="Coordinates", 
                            crs="EPSG:4326")


# Load the data from a CSV file into a pandas DataFrame
requests = pd.read_csv('./data/public_cases_fc_2020.csv.tar.gz')

/var/folders/49/ntrr94q12xd4rq8hqdnx96gm0000gn/T/ipykernel_40811/38353759.py:2: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.
  requests = pd.read_csv('./data/public_cases_fc_2020.csv.tar.gz')


print("number of requests = ", len(requests))

number of requests =  727746


requests.head()


requests = requests.dropna(subset=['lat', 'lon'])


requests['geometry'] = gpd.points_from_xy(requests['lon'], requests['lat'])


requests['geometry'].head()

13    POINT (-75.16257 40.04816)
18    POINT (-75.18500 40.03733)
21    POINT (-75.20887 40.02665)
24    POINT (-75.10652 40.03439)
25    POINT (-75.16350 39.93616)
Name: geometry, dtype: geometry


requests = gpd.GeoDataFrame(requests, 
                            geometry="geometry", 
                            crs="EPSG:4326")


service_types = requests.groupby('service_name').size()


service_types = service_types.sort_values(ascending=False)


top20 = service_types.iloc[:20]
top20

service_name
Rubbish/Recyclable Material Collection    47690
Illegal Dumping                           26252
Maintenance Complaint                     24722
Abandoned Vehicle                         24538
Information Request                       18480
Graffiti Removal                          16264
Street Light Outage                       13707
Street Defect                              9670
Agency Receivables                         6527
Other (Streets)                            5994
Construction Complaints                    5918
Sanitation / Dumpster Violation            5563
Fire Safety Complaint                      5276
Street Trees                               5141
LI Escalation                              4788
Maintenance Residential or Commercial      3389
Traffic Signal Emergency                   3013
Parks and Rec Safety and Maintenance       2773
License Complaint                          2593
Alley Light Outage                         2340
dtype: int64


trash_selection =  requests["service_name"] == "Rubbish/Recyclable Material Collection"
trash_requests = requests.loc[trash_selection].copy()

print("The nuumber of trash-related requests = ", len(trash_requests))

The nuumber of trash-related requests =  47690


# Convert the requested datetime to a column of Datetime objects
trash_requests['requested_datetime'] = pd.to_datetime(trash_requests['requested_datetime'])

# Use the .dt attribute to extract out the month name
trash_requests['month_int'] = trash_requests['requested_datetime'].dt.month
trash_requests['month'] = trash_requests['requested_datetime'].dt.month_name()

trash_requests = requests.loc[requests["service_name"] == "Rubbish/Recyclable Material Collection"].copy()


totals_by_month = trash_requests.groupby(["month_int", "month"], as_index=False).size()

totals_by_month


# Initialize figure/axes
fig, ax = plt.subplots(figsize=(12, 6))

# Plot!
sns.barplot(
    x="month_int",
    y="size",
    data=totals_by_month,
    color="#2176d2",
    ax=ax,
)

ax.set_xticklabels(totals_by_month["month"]);


# Initialize figure/axes
fig, ax = plt.subplots(figsize=(12, 6))

# Plot!
sns.barplot(
    x="month",
    y="size",
    data=totals_by_month,
    color="#2176d2",
    ax=ax,
    order=[
        "January",
        "February",
        "March",
        "April",
        "May",
        "June",
        "July",
        "August",
        "September",
        "October",
        "November",
        "December",
    ],
    zorder=999,  # Make sure the bar charts are on top of the grid
)

# Remove x/y axis labels
ax.set_xlabel("")
ax.set_ylabel("")

# Format the ytick labels to use a comma and no decimal places
ax.set_yticklabels([f"{yval:,.0f}" for yval in ax.get_yticks()])

# Rotate the xticks
plt.setp(ax.get_xticklabels(), rotation=60)

# Add a grid backgrou d
ax.grid(True, axis="y")

# Remove the top and right axes lines
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# Add a title
ax.set_title(
    "Philadelphia's Trash-Related 311 Requests in 2020", weight="bold", fontsize=16
);

/var/folders/49/ntrr94q12xd4rq8hqdnx96gm0000gn/T/ipykernel_40811/2905335741.py:33: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels([f"{yval:,.0f}" for yval in ax.get_yticks()])


trash_requests = trash_requests.to_crs(epsg=3857)


trash_requests.head()


zillow = gpd.read_file('data/zillow_neighborhoods.geojson')
zillow = zillow.to_crs(epsg=3857)


zillow.head()


fig, ax = plt.subplots(figsize=(8, 8))
ax = zillow.plot(ax=ax, facecolor='none', edgecolor='black')
ax.set_axis_off()
ax.set_aspect("equal")


joined = gpd.sjoin(trash_requests, zillow, predicate='within', how='left')


joined.head()


totals = joined.groupby('ZillowName', as_index=False).size()
type(totals)

pandas.core.frame.DataFrame


totals.head()


totals = zillow.merge(totals, on='ZillowName')


totals.head()


# Create the figure/axes
fig, ax = plt.subplots(figsize=(8, 8))

# Plot
totals.plot(
    ax=ax, 
    column="size", # NEW
    edgecolor="white", 
    linewidth=0.5, 
    legend=True, 
    cmap="viridis"
)

# Format
ax.set_axis_off()
ax.set_aspect("equal")


# Needed to line up the colorbar properly
from mpl_toolkits.axes_grid1 import make_axes_locatable


# Create the figure
fig, ax = plt.subplots(figsize=(8, 8))

# NEW: Create a nice, lined up colorbar axes (called "cax" here)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)

# Plot
totals.plot(
    ax=ax,
    cax=cax, # NEW 
    column="size",
    edgecolor="white",
    linewidth=0.5,
    legend=True,
    cmap="viridis",
)

# NEW: Get the limits of the GeoDataFrame
xmin, ymin, xmax, ymax = totals.total_bounds

# NEW: Set the xlims and ylims
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)

# Format
ax.set_axis_off()
ax.set_aspect("equal")


# Quantiles Scheme
fig, ax = plt.subplots(figsize=(10, 7), facecolor="#cfcfcf")

totals.plot(
    ax=ax,
    column="size",
    edgecolor="white",
    linewidth=0.5,
    legend=True,
    legend_kwds=dict(loc="lower right"),
    cmap="Reds",
    scheme="Quantiles",
    k=5,
)
ax.set_title("Quantiles: k = 5")
ax.set_axis_off()
ax.set_aspect("equal")


## Equal Interval Scheme
fig, ax = plt.subplots(figsize=(10,7), facecolor='#cfcfcf')
totals.plot(
    ax=ax,
    column="size",
    edgecolor="white",
    linewidth=0.5,
    legend=True,
    legend_kwds=dict(loc='lower right'),
    cmap="Reds",
    scheme="EqualInterval",
    k=5 
) 
ax.set_title("Equal Interval: k = 5")
ax.set_axis_off()
ax.set_aspect("equal")


## Fisher Jenks Scheme
fig, ax = plt.subplots(figsize=(10,7), facecolor='#cfcfcf')
totals.plot(
    ax=ax,
    column="size",
    edgecolor="white",
    linewidth=0.5,
    legend=True,
    legend_kwds=dict(loc='lower right'),
    cmap="Reds",
    scheme="FisherJenks",
     
)
ax.set_title("Fisher Jenks: k = 5")
ax.set_axis_off()
ax.set_aspect("equal")


totals['N_per_area'] = totals['size'] / (totals.geometry.area) * 1e4


# Create the figure
fig, ax = plt.subplots(figsize=(8, 8))

# NEW: Create a nice, lined up colorbar axes (called "cax" here)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)

# Plot
totals.plot(
    ax=ax,
   cax=cax,
    column="N_per_area",
    edgecolor="white",
    linewidth=0.5,
    legend=True,
    cmap="viridis",
)

# NEW: Get the limits of the GeoDataFrame
xmin, ymin, xmax, ymax = totals.total_bounds

# NEW: Set the xlims and ylims
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)

# Format
ax.set_axis_off()
ax.set_aspect("equal")


# create the axes
fig, ax = plt.subplots(figsize=(12, 12))


# Extract out the x/y coordindates of the Point objects
xcoords = trash_requests.geometry.x
ycoords = trash_requests.geometry.y

# Plot a hexbin chart
hex_vals = ax.hexbin(xcoords, ycoords, gridsize=50)

# Add the zillow geometry boundaries
zillow.plot(ax=ax, facecolor="none", edgecolor="white", linewidth=0.25)


# add a colorbar and format
fig.colorbar(hex_vals, ax=ax)
ax.set_axis_off()
ax.set_aspect("equal")


import altair as alt


# IMPORTANT: Altair needs the GeoDataFrame to be in EPSG:4326
totals_4326 = totals.to_crs(epsg=4326)

# plot map, where variables ares nested within `properties`,
(
    alt.Chart(totals_4326)
        .mark_geoshape(stroke="white")
        .encode(
            tooltip=["N_per_area:Q", "ZillowName:N", "size:Q"],
            color=alt.Color("N_per_area:Q", scale=alt.Scale(scheme="viridis")),
        ).properties(width=500, height=400)
)


data = pd.read_csv('./data/opa_residential.csv')
data.head()


data = data.dropna(subset=['lat', 'lng'])


data["Coordinates"] = gpd.points_from_xy(data["lng"], data["lat"])


gdata = gpd.GeoDataFrame(data, geometry="Coordinates", crs="EPSG:4326")


gdata.head()


# len(gdata)


zillow.crs

<Derived Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World between 85.06°S and 85.06°N.
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich


gdata = gdata.to_crs(epsg=3857)

# could also do:
# gdata = gdata.to_crs(zillow.crs)


gdata = gpd.sjoin(gdata, zillow, predicate='within', how='left')


gdata.head()


median_values = gdata.groupby("ZillowName", as_index=False)["market_value"].median()


median_values.head()


# # Merge in the median values ---> geopandas .merge pandas
median_values = zillow.merge(median_values, on='ZillowName')

# # Normalize
median_values['market_value'] /= 1e3 # in thousands


median_values.head()


# Create the figure
fig, ax = plt.subplots(figsize=(8, 8))

# Create a nice, lined up colorbar axes (called "cax" here)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)

# Plot
X = median_values.to_crs(epsg=2272)
X.plot(
     ax=ax, 
     cax=cax, 
     column="market_value", 
     edgecolor="white", 
     linewidth=0.5, 
     legend=True
 )

# Get the limits of the GeoDataFrame
xmin, ymin, xmax, ymax = X.total_bounds

# Set the xlims and ylims
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)

# Format
ax.set_axis_off()
ax.set_aspect("equal")

# Format cax labels
cax.set_yticklabels([f"${val:.0f}k" for val in cax.get_yticks()]);

/var/folders/49/ntrr94q12xd4rq8hqdnx96gm0000gn/T/ipykernel_40811/3747993766.py:31: UserWarning: FixedFormatter should only be used together with FixedLocator
  cax.set_yticklabels([f"${val:.0f}k" for val in cax.get_yticks()]);


# Create the axes
fig, ax = plt.subplots(figsize=(10, 8))

# Use the .x and .y attributes
# NOTE: we are passing in the raw point values here! 
# Matplotlib is doing the binning and aggregation work for us!
xcoords = gdata.geometry.x
ycoords = gdata.geometry.y
hex_vals = ax.hexbin(
    xcoords, 
    ycoords,
    C=gdata.market_value / 1e3,
    reduce_C_function=np.median,
    bins="log",
    gridsize=30,
)

# Add the zillow geometry boundaries
zillow.plot(ax=ax, facecolor="none", edgecolor="white", linewidth=1, alpha=0.5)

# Add a colorbar and format
cbar = fig.colorbar(hex_vals, ax=ax)
ax.set_axis_off()
ax.set_aspect("equal")

# Format cbar labels
cbar.set_ticks([100, 1000])
cbar.set_ticklabels(["$100k", "$1M"]);


# Convert median values to EPSG=4326
median_values_4326 = median_values.to_crs(epsg=4326)

# Plot the map
chart = (
    alt.Chart(median_values_4326)
    .mark_geoshape(stroke="white")
    .encode(
        tooltip=["market_value:Q", "ZillowName:N"],
        color=alt.Color("market_value:Q", scale=alt.Scale(scheme='viridis'))
    ).properties(width=500, height=400)
)

chart

	iso_a3	name	continent	pop_est	gdp_md_est	geometry
0	AFG	Afghanistan	Asia	34124811.0	64080.0	POLYGON ((61.21082 35.65007, 62.23065 35.27066...
1	AGO	Angola	Africa	29310273.0	189000.0	MULTIPOLYGON (((23.90415 -11.72228, 24.07991 -...
2	ALB	Albania	Europe	3047987.0	33900.0	POLYGON ((21.02004 40.84273, 20.99999 40.58000...
3	ARE	United Arab Emirates	Asia	6072475.0	667200.0	POLYGON ((51.57952 24.24550, 51.75744 24.29407...
4	ARG	Argentina	South America	44293293.0	879400.0	MULTIPOLYGON (((-66.95992 -54.89681, -67.56244...

	name	geometry
0	Vatican City	POINT (12.45339 41.90328)
1	San Marino	POINT (12.44177 43.93610)
2	Vaduz	POINT (9.51667 47.13372)
3	Lobamba	POINT (31.20000 -26.46667)
4	Luxembourg	POINT (6.13000 49.61166)

	name_left	geometry	index_right	iso_a3	name_right	continent	pop_est	gdp_md_est
0	Vatican City	POINT (12.45339 41.90328)	79.0	ITA	Italy	Europe	62137802.0	2221000.0
1	San Marino	POINT (12.44177 43.93610)	79.0	ITA	Italy	Europe	62137802.0	2221000.0
2	Vaduz	POINT (9.51667 47.13372)	9.0	AUT	Austria	Europe	8754413.0	416600.0
3	Lobamba	POINT (31.20000 -26.46667)	152.0	SWZ	Swaziland	Africa	1467152.0	11060.0
4	Luxembourg	POINT (6.13000 49.61166)	97.0	LUX	Luxembourg	Europe	594130.0	58740.0

	name_left	geometry	index_right	iso_a3	name_right	continent	pop_est	gdp_md_est
0	Vatican City	POINT (12.45339 41.90328)	79.0	ITA	Italy	Europe	62137802.0	2221000.0
1	San Marino	POINT (12.44177 43.93610)	79.0	ITA	Italy	Europe	62137802.0	2221000.0
226	Rome	POINT (12.48131 41.89790)	79.0	ITA	Italy	Europe	62137802.0	2221000.0

	iso_a3	name	continent	pop_est	gdp_md_est	geometry
1	AGO	Angola	Africa	29310273.0	189000.0	MULTIPOLYGON (((23.90415 -11.72228, 24.07991 -...
11	BDI	Burundi	Africa	11466756.0	7892.0	POLYGON ((29.34000 -4.49998, 29.27638 -3.29391...
13	BEN	Benin	Africa	11038805.0	24310.0	POLYGON ((2.69170 6.25882, 1.86524 6.14216, 1....
14	BFA	Burkina Faso	Africa	20107509.0	32990.0	POLYGON ((2.15447 11.94015, 1.93599 11.64115, ...
25	BWA	Botswana	Africa	2214858.0	35900.0	POLYGON ((29.43219 -22.09131, 28.01724 -22.827...

	objectid	service_request_id	status	status_notes	service_name	service_code	agency_responsible	service_notice	requested_datetime	updated_datetime	expected_datetime	address	zipcode	media_url	lat	lon
0	7890359	13127945	Closed	Question Answered	Information Request	SR-IR01	Police Department	NaN	2020-02-05 11:00:11	2020-02-05 11:00:23	2020-02-05 11:30:20	NaN	NaN	NaN	NaN	NaN
1	9551814	13788036	Closed	Question Answered	Information Request	SR-IR01	State/Federal Government Offices	NaN	2020-11-24 11:34:27	2020-11-24 11:34:32	2020-11-24 12:00:24	NaN	NaN	NaN	NaN	NaN
2	8138665	13251716	Closed	NaN	Information Request	SR-IR01	License & Inspections	NaN	2020-03-26 11:40:36	2020-03-26 11:40:47	2020-03-26 12:00:21	NaN	NaN	NaN	NaN	NaN
3	8433329	13376073	Closed	NaN	Information Request	SR-IR01	License & Inspections	NaN	2020-05-22 12:00:10	2020-05-22 12:00:39	2020-05-22 12:30:22	NaN	NaN	NaN	NaN	NaN
4	8433331	13376078	Closed	NaN	Information Request	SR-IR01	Streets Department	NaN	2020-05-22 12:00:44	2020-05-22 12:00:53	2020-05-22 12:30:22	NaN	NaN	NaN	NaN	NaN

	month_int	month	size
0	1	January	2710
1	2	February	2067
2	3	March	2460
3	4	April	5778
4	5	May	6572
5	6	June	6485
6	7	July	9627
7	8	August	4466
8	9	September	1704
9	10	October	1335
10	11	November	1729
11	12	December	2757

	objectid	service_request_id	status	status_notes	service_name	service_code	agency_responsible	service_notice	requested_datetime	updated_datetime	expected_datetime	address	zipcode	media_url	lat	lon	geometry	month_int	month
24	8180042	13269656	Closed	NaN	Rubbish/Recyclable Material Collection	SR-ST03	Streets Department	2 Business Days	2020-04-02 19:22:24	2020-04-06 07:02:57	2020-04-06 20:00:00	624 FOULKROD ST	NaN	NaN	40.034389	-75.106518	POINT (-8360819.322 4870940.907)	4	April
25	8180043	13266979	Closed	NaN	Rubbish/Recyclable Material Collection	SR-ST03	Streets Department	2 Business Days	2020-04-02 08:40:53	2020-04-06 07:02:58	2020-04-05 20:00:00	1203 ELLSWORTH ST	NaN	NaN	39.936164	-75.163497	POINT (-8367162.212 4856670.199)	4	April
57	7744426	13066443	Closed	NaN	Rubbish/Recyclable Material Collection	SR-ST03	Streets Department	2 Business Days	2020-01-02 19:17:55	2020-01-04 05:46:06	2020-01-06 19:00:00	9054 WESLEYAN RD	NaN	NaN	40.058737	-75.018345	POINT (-8351004.015 4874481.442)	1	January
58	7744427	13066540	Closed	NaN	Rubbish/Recyclable Material Collection	SR-ST03	Streets Department	2 Business Days	2020-01-03 07:01:46	2020-01-04 05:46:07	2020-01-06 19:00:00	2784 WILLITS RD	NaN	NaN	40.063658	-75.022347	POINT (-8351449.489 4875197.202)	1	January
175	7801094	13089345	Closed	NaN	Rubbish/Recyclable Material Collection	SR-ST03	Streets Department	2 Business Days	2020-01-15 13:22:14	2020-01-16 14:03:29	2020-01-16 19:00:00	6137 LOCUST ST	NaN	NaN	39.958186	-75.244732	POINT (-8376205.240 4859867.796)	1	January

	ZillowName	geometry
0	Academy Gardens	POLYGON ((-8348795.677 4875297.327, -8348355.9...
1	Airport	POLYGON ((-8370923.380 4850336.405, -8370799.2...
2	Allegheny West	POLYGON ((-8367432.106 4866417.820, -8367436.0...
3	Andorra	POLYGON ((-8373967.120 4875663.024, -8374106.1...
4	Aston Woodbridge	POLYGON ((-8349918.770 4873746.906, -8349919.8...

	parcel_number	lat	lng	location	market_value	building_value	land_value	total_land_area	total_livable_area
0	71361800	39.991575	-75.128994	2726 A ST	62200.0	44473.0	17727.0	1109.69	1638.0
1	71362100	39.991702	-75.128978	2732 A ST	25200.0	18018.0	7182.0	1109.69	1638.0
2	71362200	39.991744	-75.128971	2734 A ST	62200.0	44473.0	17727.0	1109.69	1638.0
3	71362600	39.991994	-75.128895	2742 A ST	15500.0	11083.0	4417.0	1109.69	1638.0
4	71363800	39.992592	-75.128743	2814 A ST	31300.0	22400.0	8900.0	643.50	890.0

	ZillowName	market_value
0	Academy Gardens	185950.0
1	Allegheny West	34750.0
2	Andorra	251900.0
3	Aston Woodbridge	183800.0
4	Bartram Village	48300.0

	ZillowName	geometry	market_value
0	Academy Gardens	POLYGON ((-8348795.677 4875297.327, -8348355.9...	185.95
1	Allegheny West	POLYGON ((-8367432.106 4866417.820, -8367436.0...	34.75
2	Andorra	POLYGON ((-8373967.120 4875663.024, -8374106.1...	251.90
3	Aston Woodbridge	POLYGON ((-8349918.770 4873746.906, -8349919.8...	183.80
4	Bartram Village	POLYGON ((-8372041.314 4856283.292, -8372041.6...	48.30

Week 3B: Geospatial Data Analysis and GeoPandas¶

Housekeeping¶

Agenda for Week #3¶

Part 1: Picking up where we left off with spatial operations¶

1. The spatial join¶

2. Spatial overlay operation¶

Plot the difference of the two geometries¶

Plot the intersection of the two geometries¶

Recap: spatial operations¶

Putting it all together: 311 requests in 2020¶

First, convert to a GeoDataFrame¶

Part 2: Putting it all together: 311 requests in 2020¶

First, convert to a GeoDataFrame¶

Next, identify the top 20 most common requests¶

Let's trim to only the trash-related requests¶

Trash collection was a big concern in Philadelphia when the pandemic began¶

Let's plot the monthly totals for 2020¶

Note: Setting with a copy warning¶

Note: I've used the as_index=False syntax here¶

Plot a bar chart with seaborn¶

Example: Improving the aesthetics of matplotlib¶

Now let's look at some geospatial trends!¶

Let's convert from lat/lng to Web Mercator¶

Calculate statistics by Zillow neighborhood¶

Goal: for each neighborhood, calculate the number of trash-related requests¶

Visualize as a choropleth map¶

Can we make the aesthetics better?¶

Yes!¶

Can we classify the data into bins?¶

Classification schemes¶

Documentation for classification schemes¶

Neighborhood sizes still make it hard to compare raw counts¶

Even smarter¶

More advanced: hex bins¶

Can we do better?¶

Part 3 Exercise: property assessments in Philadelphia¶

Dataset¶

Step 1: Load the assessment data¶

Step 2: Convert to a GeoDataFrame¶

Step 3: Do a spatial join with Zillow neighbohoods¶

Step 4: Make a choropleth of the median market value by neighborhood¶

Step 5: Make a hex bin map of median assessments¶

Step 6: Use altair to make an interactive choropleth¶

That's it!¶

Note: I've used the `as_index=False` syntax here¶