# Initial imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd


import intake


datasets = intake.open_catalog("./datasets.yml")


# Datashader imports
import datashader as ds
import datashader.transfer_functions as tf

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


# Color-related imports
from datashader.colors import Greys9, viridis, inferno
from colorcet import fire


# Load the data
# REMEMBER: this will take some time to download the first time
census_ddf = datasets.census.to_dask()


census_ddf


census_ddf.head()


from datashader.utils import lnglat_to_meters


# Sensible lat/lng coordinates for U.S. cities
# NOTE: these are in lat/lng so EPSG=4326
USA = [(-124.72,  -66.95), (23.55, 50.06)]

# Get USA xlim and ylim in meters (EPSG=3857)
USA_xlim_meters, USA_ylim_meters = [list(r) for r in lnglat_to_meters(USA[0], USA[1])]


# Define some a default plot width & height
plot_width  = 900
plot_height = int(plot_width*7.0/12)


color_key = {"w": "aqua", "b": "lime", "a": "red", "h": "fuchsia", "o": "yellow"}


# Step 1: Setup canvas
cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)

# Step 2: Aggregate and count race category
aggc = cvs.points(census_ddf, "easting", "northing", agg=ds.count_cat("race"))

# NEW: Select only African Americans (where "race" column is equal to "b")
agg_b = aggc.sel(race="b")

# Step 3: Shade and set background
img = tf.shade(agg_b, cmap=fire, how="eq_hist")
img = tf.set_background(img, "black")

img


(aggc.sel(race=['w', 'b', 'a', 'h']) > 0).all(dim='race')

<xarray.DataArray (northing: 525, easting: 900)>
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
Coordinates:
  * easting   (easting) float64 -1.388e+07 -1.387e+07 ... -7.464e+06 -7.457e+06
  * northing  (northing) float64 2.822e+06 2.828e+06 ... 6.326e+06 6.333e+06

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

array([-13880456.374722, -13873311.124167, -13866165.873611, ...,
        -7471166.626389,  -7464021.375833,  -7456876.125278])

array([2821641.671905, 2828342.015714, 2835042.359524, ..., 6319221.140476,
       6325921.484286, 6332621.828095])


# Do a "logical and" operation across the "race" dimension
# Pixels will be "True" if the pixel has a positive count for each race
diverse_selection = (aggc.sel(race=['w', 'b', 'a', 'h']) > 0).all(dim='race')

diverse_selection

<xarray.DataArray (northing: 525, easting: 900)>
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
Coordinates:
  * easting   (easting) float64 -1.388e+07 -1.387e+07 ... -7.464e+06 -7.457e+06
  * northing  (northing) float64 2.822e+06 2.828e+06 ... 6.326e+06 6.333e+06

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

array([-13880456.374722, -13873311.124167, -13866165.873611, ...,
        -7471166.626389,  -7464021.375833,  -7456876.125278])

array([2821641.671905, 2828342.015714, 2835042.359524, ..., 6319221.140476,
       6325921.484286, 6332621.828095])


# Select the pixel values where our diverse selection criteria is True
agg2 = aggc.where(diverse_selection).fillna(0)

# and shade using our color key
img = tf.shade(agg2, color_key=color_key)
img = tf.set_background(img,"black")

img


# Select where the "b" race dimension is greater than the "w" race dimension
selection = aggc.sel(race='b') > aggc.sel(race='w') 

selection

<xarray.DataArray (northing: 525, easting: 900)>
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
Coordinates:
  * easting   (easting) float64 -1.388e+07 -1.387e+07 ... -7.464e+06 -7.457e+06
  * northing  (northing) float64 2.822e+06 2.828e+06 ... 6.326e+06 6.333e+06

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

array([-13880456.374722, -13873311.124167, -13866165.873611, ...,
        -7471166.626389,  -7464021.375833,  -7456876.125278])

array([2821641.671905, 2828342.015714, 2835042.359524, ..., 6319221.140476,
       6325921.484286, 6332621.828095])


# Select based on the selection criteria
agg3 = aggc.where(selection).fillna(0)

img = tf.shade(agg3, color_key=color_key)
img = tf.set_background(img, "black")

img


# Initialize hvplot and dask
import hvplot.pandas
import hvplot.dask # NEW: dask works with hvplot too!

import holoviews as hv
import geoviews as gv


# UNCOMMENT THIS LINE IF YOU HAVE AT LEAST 16 GB OF MEMORY
# census_ddf = census_ddf.persist()


census_ddf


# Plot the points
points = census_ddf.hvplot.points(
    x="easting",
    y="northing",
    datashade=True, # NEW: tell hvplot to use datashader!
    aggregator=ds.count(),
    cmap=fire,
    geo=True,
    crs=3857, # Input data is in 3857, so we need to tell hvplot
    frame_width=plot_width,
    frame_height=plot_height,
    xlim=USA_xlim_meters, # NEW: Specify the xbounds in meters (EPSG=3857)
    ylim=USA_ylim_meters, # NEW: Specify the ybounds in meters (EPSG=3857)
)

# Put a tile source behind it
bg = gv.tile_sources.CartoDark

bg * points


# Points with categorical colormap
race_map = census_ddf.hvplot.points(
    x="easting",
    y="northing",
    datashade=True,
    c="race",  # NEW: color pixels by "race" column
    aggregator=ds.count_cat("race"),  # NEW: specify the aggregator
    cmap=color_key,  # NEW: use our custom color map dictionary
    crs=3857,
    geo=True,
    frame_width=plot_width,
    frame_height=plot_height,
    xlim=USA_xlim_meters,
    ylim=USA_ylim_meters,
)

bg = gv.tile_sources.CartoDark

bg * race_map


# Load congressional districts and convert to EPSG=3857
districts = gpd.read_file('./data/cb_2015_us_cd114_5m').to_crs(epsg=3857)


# Plot the district map
districts_map = districts.hvplot.polygons(
    geo=True,
    crs=3857,
    line_color="white",
    fill_alpha=0,
    frame_width=plot_width,
    frame_height=plot_height,
    xlim=USA_xlim_meters,
    ylim=USA_ylim_meters
    
)

bg * districts_map

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/geoviews/operation/projection.py:79: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
  polys = [g for g in geom if g.area > 1e-15]


# Combine the background, race map, and districts into a single map
img = bg * race_map * districts_map

img

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/geoviews/operation/projection.py:79: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
  polys = [g for g in geom if g.area > 1e-15]


# Load from our intake catalog
# Remember: this will take some time to download the first time!
taxi_ddf = datasets.nyc_taxi_wide.to_dask()


taxi_ddf


print(f"{len(taxi_ddf)} Rows")
print(f"Columns: {list(taxi_ddf.columns)}")

11842094 Rows
Columns: ['tpep_pickup_datetime', 'tpep_dropoff_datetime', 'passenger_count', 'trip_distance', 'pickup_x', 'pickup_y', 'dropoff_x', 'dropoff_y', 'fare_amount', 'tip_amount', 'dropoff_hour', 'pickup_hour']


taxi_ddf.head()


# Trim to the columns
taxi_ddf = taxi_ddf[
    [
        "passenger_count",
        "pickup_x",
        "pickup_y",
        "dropoff_x",
        "dropoff_y",
        "dropoff_hour",
        "pickup_hour",
    ]
]


pickups_map = taxi_ddf.hvplot.points(
    x="pickup_x",
    y="pickup_y",
    cmap=fire,
    datashade=True,
    frame_width=800,
    frame_height=600,
    geo=True, 
    crs=3857
)

gv.tile_sources.CartoDark * pickups_map


pickups_map = taxi_ddf.hvplot.points(
    x="pickup_x",
    y="pickup_y",
    groupby="pickup_hour",
    cmap=fire,
    datashade=True,
    frame_width=800,
    frame_height=600,
    geo=True, 
    crs=3857
)

gv.tile_sources.CartoDark * pickups_map


# Bounds in meters for NYC (EPSG=3857)
NYC = [(-8242000,-8210000), (4965000,4990000)]

# Set a plot width and height
plot_width  = int(750)
plot_height = int(plot_width//1.2)


def create_merged_taxi_image(
    x_range, y_range, w=plot_width, h=plot_height, how="eq_hist"
):
    """
    Create a merged taxi image, showing areas with: 
    
    - More pickups than dropoffs in red
    - More dropoffs than pickups in blue
    """
    # Step 1: Create the canvas
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)

    # Step 2: Aggregate the pick ups
    picks = cvs.points(taxi_ddf, "pickup_x", "pickup_y", ds.count("passenger_count"))

    # Step 2: Aggregate the drop offs
    drops = cvs.points(taxi_ddf, "dropoff_x", "dropoff_y", ds.count("passenger_count"))

    # Rename to same names
    drops = drops.rename({"dropoff_x": "x", "dropoff_y": "y"})
    picks = picks.rename({"pickup_x": "x", "pickup_y": "y"})

    # Step 3: Shade
    # NEW: shade pixels there are more drop offs than pick ups
    # These are colored blue
    more_drops = tf.shade(
        drops.where(drops > picks), cmap=["darkblue", "cornflowerblue"], how=how
    )

    # Step 3: Shade
    # NEW: shade pixels where there are more pick ups than drop offs
    # These are colored red
    more_picks = tf.shade(
        picks.where(picks > drops), cmap=["darkred", "orangered"], how=how
    )

    # Step 4: Combine
    # NEW: add the images together!
    img = tf.stack(more_picks, more_drops)

    return tf.set_background(img, "black")


create_merged_taxi_image(NYC[0], NYC[1])


def create_taxi_image(df, x_range, y_range, w=plot_width, h=plot_height, cmap=fire):
    """Create an image of taxi dropoffs, returning a Python Imaging Library (PIL) image."""
    
    # Step 1: Create the canvas
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    
    # Step 2: Aggregate the dropoff positions, coutning number of passengers
    agg = cvs.points(df, 'dropoff_x', 'dropoff_y',  ds.count('passenger_count'))
    
    # Step 3: Shade
    img = tf.shade(agg, cmap=cmap, how='eq_hist')
    
    # Set the background
    img = tf.set_background(img, "black")
    
    # NEW: return an PIL image
    return img.to_pil()


def convert_to_12hour(hr24):
    """Convert from 24 hr to 12 hr."""
    from datetime import datetime
    d = datetime.strptime(str(hr24), "%H")
    return d.strftime("%I %p")


def plot_dropoffs_by_hour(fig, data_all_hours, hour, x_range, y_range):
    """Plot the dropoffs for particular hour."""
    
    # Trim to the specific hour
    df_this_hour = data_all_hours.loc[data_all_hours["dropoff_hour"] == hour]

    # Create the datashaded image for this hour
    img = create_taxi_image(df_this_hour, x_range, y_range)

    # Plot the image on a matplotlib axes
    # Use imshow()
    plt.clf()
    ax = fig.gca()
    ax.imshow(img, extent=[x_range[0], x_range[1], y_range[0], y_range[1]])
    
    # Format the axis and figure
    ax.set_aspect("equal")
    ax.set_axis_off()
    fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
    fig.tight_layout()

    # Optional: Add a text label for the hour
    ax.text(
        0.05,
        0.9,
        convert_to_12hour(hour),
        color="white",
        fontsize=40,
        ha="left",
        transform=ax.transAxes,
    )

    # Draw the figure and return the image
    # This converts our matplotlib Figure into a format readable by imageio
    fig.canvas.draw()
    image = np.frombuffer(fig.canvas.tostring_rgb(), dtype="uint8")
    image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))

    return image


import imageio


# Create a figure
fig, ax = plt.subplots(figsize=(10, 10), facecolor="black")

# Create an image for each hour
imgs = []
for hour in range(24):

    # Plot the datashaded image for this specific hour
    print(hour)
    img = plot_dropoffs_by_hour(fig, taxi_ddf, hour, x_range=NYC[0], y_range=NYC[1])
    imgs.append(img)


# Combing the images for each hour into a single GIF
imageio.mimsave("dropoffs.gif", imgs, fps=1);


df = pd.read_csv('./data/nyc_taxi_2016_by_hour.csv.gz', parse_dates=['Pickup_date'])


df.head()


# create relevant time columns
df["Day & Hour"] = df["Pickup_date"].dt.strftime("%A %H:00")
df["Week of Year"] = df["Pickup_date"].dt.strftime("Week %W")
df["Date"] = df["Pickup_date"].dt.strftime("%Y-%m-%d")


df.head()


# Create a Holoviews HeatMap option
cols = ["Day & Hour", "Week of Year", "Pickup_Count", "Date"]
heatmap = hv.HeatMap(
    df[cols], kdims=["Day & Hour", "Week of Year"], vdims=["Pickup_Count", "Date"]
)


heatmap.opts(
    radial=True,
    height=600,
    width=600,
    yticks=None,
    xmarks=7,
    ymarks=3,
    start_angle=np.pi * 19 / 14,
    xticks=(
        "Friday",
        "Saturday",
        "Sunday",
        "Monday",
        "Tuesday",
        "Wednesday",
        "Thursday",
    ),
    tools=["hover"],
    cmap="fire"
)


import dask.dataframe as dd


# I downloaded the data and moved it to the "data/" folder of my week-7 repository locally
df = dd.read_csv("data/parking_violations.csv", assume_missing=True)

df


len(df)

9412858


df.head()


df = df.dropna()

df


len(df)

8659655


from datashader.utils import lnglat_to_meters


# Do the conversion
x, y = lnglat_to_meters(df['lon'], df['lat'])


# Add as columns
df['x'] = x
df['y'] = y


df.head()

df


# Use lat/lng bounds for Philly
# This will exclude any points that fall outside this region
PhillyBounds = [( -75.28,  -74.96), (39.86, 40.14)]


PhillyBoundsLng = PhillyBounds[0]
PhillyBoundsLat = PhillyBounds[1]


# Convert to an EPSG=3857
x_range, y_range = lnglat_to_meters(PhillyBoundsLng, PhillyBoundsLat)

x_range

array([-8380131.26691764, -8344509.02986379])


# Make sure these are lists as opposed to arrays
x_range = list(x_range)
y_range = list(y_range)


x_range

[-8380131.266917636, -8344509.029863787]


# Create the canvas
cvs = ds.Canvas(plot_width=600, plot_height=600, x_range=x_range, y_range=y_range)

# Aggregate the points
agg = cvs.points(df, "x", "y", agg=ds.count())

# Shade the aggregated pixels
img = tf.shade(agg, cmap=fire, how="eq_hist")

# Set the background of the image
img = tf.set_background(img, "black")

# Show!
img


# Load
city_limits = gpd.read_file("./data/City_Limits.geojson")

# Same CRS!
city_limits = city_limits.to_crs(epsg=3857)


# Show with matplotlib
fig, ax = plt.subplots(figsize=(10,10))
ax.imshow(img.to_pil(), extent=[x_range[0], x_range[1], y_range[0], y_range[1]])

# Format
ax.set_aspect("equal")
ax.set_axis_off()

# Add the city limits on top!
city_limits.plot(ax=ax, facecolor='none', edgecolor='white', linewidth=2)

<AxesSubplot:>


violations = df.hvplot.points(
    x="x",
    y="y",
    datashade=True,
    dynspread=False,
    geo=True,
    crs=3857,
    frame_width=600,
    frame_height=600,
    cmap=fire,
    xlim=x_range,
    ylim=y_range,
)


gv.tile_sources.CartoDark * violations

	easting	northing	race
0	-12418767.0	3697425.00	h
1	-12418512.0	3697143.50	h
2	-12418245.0	3697584.50	h
3	-12417703.0	3697636.75	w
4	-12418120.0	3697129.25	h

	tpep_pickup_datetime	tpep_dropoff_datetime	passenger_count	trip_distance	pickup_x	pickup_y	dropoff_x	dropoff_y	fare_amount	tip_amount	dropoff_hour	pickup_hour
0	2015-01-15 19:05:39	2015-01-15 19:23:42	1	1.59	-8236963.0	4975552.5	-8234835.5	4975627.0	12.0	3.25	19	19
1	2015-01-10 20:33:38	2015-01-10 20:53:28	1	3.30	-8237826.0	4971752.5	-8237020.5	4976875.0	14.5	2.00	20	20
2	2015-01-10 20:33:38	2015-01-10 20:43:41	1	1.80	-8233561.5	4983296.5	-8232279.0	4986477.0	9.5	0.00	20	20
3	2015-01-10 20:33:39	2015-01-10 20:35:31	1	0.50	-8238654.0	4970221.0	-8238124.0	4971127.0	3.5	0.00	20	20
4	2015-01-10 20:33:39	2015-01-10 20:52:58	1	3.00	-8234433.5	4977363.0	-8238107.5	4974457.0	15.0	0.00	20	20

	Pickup_date	Pickup_Count
0	2016-01-01 00:00:00	19865
1	2016-01-01 01:00:00	24376
2	2016-01-01 02:00:00	24177
3	2016-01-01 03:00:00	21538
4	2016-01-01 04:00:00	15665

	Pickup_date	Pickup_Count	Day & Hour	Week of Year	Date
0	2016-01-01 00:00:00	19865	Friday 00:00	Week 00	2016-01-01
1	2016-01-01 01:00:00	24376	Friday 01:00	Week 00	2016-01-01
2	2016-01-01 02:00:00	24177	Friday 02:00	Week 00	2016-01-01
3	2016-01-01 03:00:00	21538	Friday 03:00	Week 00	2016-01-01
4	2016-01-01 04:00:00	15665	Friday 04:00	Week 00	2016-01-01

	lon	lat
0	-75.158937	39.956252
1	-75.154730	39.955233
2	-75.172386	40.034175
3	NaN	NaN
4	-75.157291	39.952661

Week 7
Analyzing and Visualizing Large Datasets

This week's agenda: working with big data¶

Load the Census data to a dask array¶

Can we learn more than just population density and race?¶

Question #1: Where do African Americans live?¶

Question #2: How to identify diverse areas?¶

Question #3: Where is African American population greater than the White population?¶

Now let's make it interactive!¶

Side note: persisting dask arrays in memory¶

We can visualize color-coded race interactively as well¶

Use case: exploring gerrymandering¶

Example 3: NYC taxi data¶

Exploring the taxi pick ups...¶

Example: Comparing pickups and dropoffs¶

Let's generate a time-lapse GIF of drop-offs over time¶

Define some functions...¶

Strategy:¶

Interesting aside: Beyond hvplot¶

Let's plot a radial heatmap¶

Trends¶

Useful reference: the Holoviews example gallery¶

Exercise: Datashading Philly parking violations data¶

Download the data¶

Step 1: Use `dask` to load the data¶

Step 2: Remove any rows with missing geometries¶

Step 3: Convert lat/lng to Web Mercator coordinates (x, y)¶

Step 4: Get the x/y range for Philadelphia for our canvas¶

Step 5: Datashade the dataset¶

Step 6: Make an interactive map¶

That's it!¶

	easting	northing	race
npartitions=36
	float32	float32	category[unknown]
	...	...	...
...	...	...	...
	...	...	...
	...	...	...

	tpep_pickup_datetime	tpep_dropoff_datetime	passenger_count	trip_distance	pickup_x	pickup_y	dropoff_x	dropoff_y	fare_amount	tip_amount	dropoff_hour	pickup_hour
npartitions=1
	datetime64[ns]	datetime64[ns]	uint8	float32	float32	float32	float32	float32	float32	float32	uint8	uint8
	...	...	...	...	...	...	...	...	...	...	...	...

	lon	lat	x	y
0	-75.158937	39.956252	-8.366655e+06	4.859587e+06
1	-75.154730	39.955233	-8.366186e+06	4.859439e+06
2	-75.172386	40.034175	-8.368152e+06	4.870910e+06
4	-75.157291	39.952661	-8.366471e+06	4.859066e+06
5	-75.162902	39.959713	-8.367096e+06	4.860090e+06

	lon	lat	x	y
npartitions=5
	float64	float64	float64	float64
	...	...	...	...
...	...	...	...	...
	...	...	...	...
	...	...	...	...

Week 7Analyzing and Visualizing Large Datasets

This week's agenda: working with big data¶

Load the Census data to a dask array¶

Can we learn more than just population density and race?¶

Question #1: Where do African Americans live?¶

Question #2: How to identify diverse areas?¶

Question #3: Where is African American population greater than the White population?¶

Now let's make it interactive!¶

Side note: persisting dask arrays in memory¶

We can visualize color-coded race interactively as well¶

Use case: exploring gerrymandering¶

Example 3: NYC taxi data¶

Exploring the taxi pick ups...¶

Example: Comparing pickups and dropoffs¶

Let's generate a time-lapse GIF of drop-offs over time¶

Define some functions...¶

Strategy:¶

Interesting aside: Beyond hvplot¶

Add date-related columns with strftime¶

Let's plot a radial heatmap¶

Trends¶

Useful reference: the Holoviews example gallery¶

Exercise: Datashading Philly parking violations data¶

Download the data¶

Step 1: Use dask to load the data¶

Step 2: Remove any rows with missing geometries¶

Step 3: Convert lat/lng to Web Mercator coordinates (x, y)¶

Step 4: Get the x/y range for Philadelphia for our canvas¶

Step 5: Datashade the dataset¶

Step 6: Make an interactive map¶

That's it!¶

Week 7
Analyzing and Visualizing Large Datasets

Add date-related columns with `strftime`¶

Step 1: Use `dask` to load the data¶