from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd


import intake
import xarray as xr


import hvplot.xarray
import hvplot.pandas
import holoviews as hv
import geoviews as gv
from geoviews.tile_sources import EsriImagery

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/dask/dataframe/backends.py:189: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


url = 'https://raw.githubusercontent.com/pyviz-topics/EarthML/master/examples/catalog.yml'
cat = intake.open_catalog(url)


band_info = pd.DataFrame([
        (1,  "Aerosol", " 0.43 - 0.45",    0.440,  "30",         "Coastal aerosol"),
        (2,  "Blue",    " 0.45 - 0.51",    0.480,  "30",         "Blue"),
        (3,  "Green",   " 0.53 - 0.59",    0.560,  "30",         "Green"),
        (4,  "Red",     " 0.64 - 0.67",    0.655,  "30",         "Red"),
        (5,  "NIR",     " 0.85 - 0.88",    0.865,  "30",         "Near Infrared (NIR)"),
        (6,  "SWIR1",   " 1.57 - 1.65",    1.610,  "30",         "Shortwave Infrared (SWIR) 1"),
        (7,  "SWIR2",   " 2.11 - 2.29",    2.200,  "30",         "Shortwave Infrared (SWIR) 2"),
        (8,  "Panc",    " 0.50 - 0.68",    0.590,  "15",         "Panchromatic"),
        (9,  "Cirrus",  " 1.36 - 1.38",    1.370,  "30",         "Cirrus"),
        (10, "TIRS1",   "10.60 - 11.19",   10.895, "100 * (30)", "Thermal Infrared (TIRS) 1"),
        (11, "TIRS2",   "11.50 - 12.51",   12.005, "100 * (30)", "Thermal Infrared (TIRS) 2")],
    columns=['Band', 'Name', 'Wavelength Range (µm)', 'Nominal Wavelength (µm)', 'Resolution (m)', 'Description']).set_index(["Band"])
band_info


# Necessary variables
path = 14
row = 32
product_id = 'LC08_L1TP_014032_20160727_20170222_01_T1'


from random import random
from time import sleep


def get_band_with_exponential_backoff(path, row, product_id, band, maximum_backoff=32):
    """
    Google Cloud Storage recommends using exponential backoff 
    when accessing the API. 
    
    https://cloud.google.com/storage/docs/exponential-backoff
    """
    n = backoff = 0
    while backoff < maximum_backoff:
        try:
            return cat.google_landsat_band(
                path=path, row=row, product_id=product_id, band=band
            ).to_dask()
        except:
            backoff = min(2 ** n + random(), maximum_backoff)
            sleep(backoff)
            n += 1


bands = [1, 2, 3, 4, 5, 6, 7, 9, 10, 11]

datasets = []
for band in bands:
    da = get_band_with_exponential_backoff(
        path=path, row=row, product_id=product_id, band=band
    )
    da = da.assign_coords(band=[band])
    datasets.append(da)

ds = xr.concat(datasets, "band", compat="identical")

ds

<xarray.DataArray (band: 10, y: 7871, x: 7741)>
dask.array<concatenate, shape=(10, 7871, 7741), dtype=uint16, chunksize=(1, 256, 256), chunktype=numpy.ndarray>
Coordinates:
  * band     (band) int64 1 2 3 4 5 6 7 9 10 11
  * y        (y) float64 4.582e+06 4.582e+06 4.582e+06 ... 4.346e+06 4.346e+06
  * x        (x) float64 3.954e+05 3.954e+05 3.955e+05 ... 6.276e+05 6.276e+05
Attributes:
    transform:      (30.0, 0.0, 395385.0, 0.0, -30.0, 4582215.0)
    crs:            +init=epsg:32618
    res:            (30.0, 30.0)
    is_tiled:       1
    nodatavals:     (nan,)
    scales:         (1.0,)
    offsets:        (0.0,)
    AREA_OR_POINT:  Point

array([ 1,  2,  3,  4,  5,  6,  7,  9, 10, 11])

array([4582200., 4582170., 4582140., ..., 4346160., 4346130., 4346100.])

array([395400., 395430., 395460., ..., 627540., 627570., 627600.])


def load_google_landsat_metadata(path, row, product_id):
    """
    Load Landsat metadata for path, row, product_id from Google Cloud Storage
    """

    def parse_type(x):
        try:
            return eval(x)
        except:
            return x

    baseurl = "https://storage.googleapis.com/gcp-public-data-landsat/LC08/01"
    suffix = f"{path:03d}/{row:03d}/{product_id}/{product_id}_MTL.txt"

    df = intake.open_csv(
        urlpath=f"{baseurl}/{suffix}",
        csv_kwargs={
            "sep": "=",
            "header": None,
            "names": ["index", "value"],
            "skiprows": 2,
            "converters": {"index": (lambda x: x.strip()), "value": parse_type},
        },
    ).read()
    metadata = df.set_index("index")["value"]
    return metadata


metadata = load_google_landsat_metadata(path, row, product_id)
metadata.head(n=10)

index
ORIGIN                         Image courtesy of the U.S. Geological Survey
REQUEST_ID                                              0501702206266_00020
LANDSAT_SCENE_ID                                      LC80140322016209LGN01
LANDSAT_PRODUCT_ID                 LC08_L1TP_014032_20160727_20170222_01_T1
COLLECTION_NUMBER                                                        01
FILE_DATE                                              2017-02-22T04:00:05Z
STATION_ID                                                              LGN
PROCESSING_SOFTWARE_VERSION                                      LPGS_2.7.0
END_GROUP                                                METADATA_FILE_INFO
GROUP                                                      PRODUCT_METADATA
Name: value, dtype: object


# Load the GeoJSON from the URL
url = "http://data.phl.opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson"
city_limits = gpd.read_file(url)

# Convert to the right CRS for this data
city_limits = city_limits.to_crs(epsg=32618)


# Look up the order of xmin/xmax and ymin/ymax
city_limits.total_bounds?

Type:        property
String form: <property object at 0x137d144a0>
Docstring:  
Returns a tuple containing ``minx``, ``miny``, ``maxx``, ``maxy``
values for the bounds of the series as a whole.

See ``GeoSeries.bounds`` for the bounds of the geometries contained in
the series.

Examples
--------
>>> from shapely.geometry import Point, Polygon, LineString
>>> d = {'geometry': [Point(3, -1), Polygon([(0, 0), (1, 1), (1, 0)]),
... LineString([(0, 1), (1, 2)])]}
>>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
>>> gdf.total_bounds
array([ 0., -1.,  3.,  2.])


# Get x/y range of city limits from "total_bounds"
xmin, ymin, xmax, ymax = city_limits.total_bounds


# Slice our xarray data
subset = ds.sel(
                x=slice(xmin, xmax), 
                y=slice(ymax, ymin)
               ) # NOTE: y coordinate system is in descending order!


subset = subset.compute()

subset

<xarray.DataArray (band: 10, y: 1000, x: 924)>
array([[[10702, 10162, 10361, ..., 11681, 11489, 15594],
        [10870, 10376, 10122, ..., 11620, 11477, 12042],
        [10429, 10147, 10063, ..., 11455, 11876, 11790],
        ...,
        [11944, 11696, 11626, ..., 11192, 11404, 10923],
        [12406, 11555, 11155, ..., 11516, 11959, 10766],
        [11701, 11232, 10657, ..., 10515, 11475, 10926]],

       [[ 9809,  9147,  9390, ..., 10848, 10702, 15408],
        [ 9989,  9405,  9139, ..., 10831, 10660, 11361],
        [ 9439,  9083,  8981, ..., 10654, 11141, 11073],
        ...,
        [11220, 10853, 10741, ..., 10318, 10511,  9950],
        [11765, 10743, 10259, ..., 10646, 11378,  9829],
        [10889, 10365,  9630, ...,  9500, 10573, 10008]],

       [[ 9042,  8466,  8889, ..., 10014,  9647, 14981],
        [ 9699,  8714,  8596, ...,  9866,  9783, 11186],
        [ 8623,  8457,  8334, ...,  9688, 10474,  9993],
        ...,
...
        ...,
        [ 5027,  5028,  5038, ...,  5035,  5037,  5029],
        [ 5058,  5021,  5023, ...,  5035,  5041,  5031],
        [ 5036,  5041,  5040, ...,  5036,  5044,  5035]],

       [[29033, 28976, 28913, ..., 32614, 32577, 32501],
        [28940, 28904, 28858, ..., 32516, 32545, 32545],
        [28882, 28879, 28854, ..., 32431, 32527, 32563],
        ...,
        [30094, 29929, 29713, ..., 29521, 29525, 29429],
        [30140, 29972, 29696, ..., 29556, 29516, 29398],
        [30133, 29960, 29614, ..., 29587, 29533, 29424]],

       [[25558, 25519, 25492, ..., 27680, 27650, 27619],
        [25503, 25450, 25402, ..., 27636, 27630, 27639],
        [25473, 25434, 25378, ..., 27609, 27668, 27667],
        ...,
        [26126, 26055, 25934, ..., 25622, 25586, 25520],
        [26149, 26077, 25935, ..., 25651, 25594, 25507],
        [26147, 26050, 25856, ..., 25696, 25644, 25571]]], dtype=uint16)
Coordinates:
  * band     (band) int64 1 2 3 4 5 6 7 9 10 11
  * y        (y) float64 4.443e+06 4.443e+06 4.443e+06 ... 4.413e+06 4.413e+06
  * x        (x) float64 4.761e+05 4.761e+05 4.761e+05 ... 5.037e+05 5.038e+05
Attributes:
    transform:      (30.0, 0.0, 395385.0, 0.0, -30.0, 4582215.0)
    crs:            +init=epsg:32618
    res:            (30.0, 30.0)
    is_tiled:       1
    nodatavals:     (nan,)
    scales:         (1.0,)
    offsets:        (0.0,)
    AREA_OR_POINT:  Point

array([[[10702, 10162, 10361, ..., 11681, 11489, 15594],
        [10870, 10376, 10122, ..., 11620, 11477, 12042],
        [10429, 10147, 10063, ..., 11455, 11876, 11790],
        ...,
        [11944, 11696, 11626, ..., 11192, 11404, 10923],
        [12406, 11555, 11155, ..., 11516, 11959, 10766],
        [11701, 11232, 10657, ..., 10515, 11475, 10926]],

       [[ 9809,  9147,  9390, ..., 10848, 10702, 15408],
        [ 9989,  9405,  9139, ..., 10831, 10660, 11361],
        [ 9439,  9083,  8981, ..., 10654, 11141, 11073],
        ...,
        [11220, 10853, 10741, ..., 10318, 10511,  9950],
        [11765, 10743, 10259, ..., 10646, 11378,  9829],
        [10889, 10365,  9630, ...,  9500, 10573, 10008]],

       [[ 9042,  8466,  8889, ..., 10014,  9647, 14981],
        [ 9699,  8714,  8596, ...,  9866,  9783, 11186],
        [ 8623,  8457,  8334, ...,  9688, 10474,  9993],
        ...,
...
        ...,
        [ 5027,  5028,  5038, ...,  5035,  5037,  5029],
        [ 5058,  5021,  5023, ...,  5035,  5041,  5031],
        [ 5036,  5041,  5040, ...,  5036,  5044,  5035]],

       [[29033, 28976, 28913, ..., 32614, 32577, 32501],
        [28940, 28904, 28858, ..., 32516, 32545, 32545],
        [28882, 28879, 28854, ..., 32431, 32527, 32563],
        ...,
        [30094, 29929, 29713, ..., 29521, 29525, 29429],
        [30140, 29972, 29696, ..., 29556, 29516, 29398],
        [30133, 29960, 29614, ..., 29587, 29533, 29424]],

       [[25558, 25519, 25492, ..., 27680, 27650, 27619],
        [25503, 25450, 25402, ..., 27636, 27630, 27639],
        [25473, 25434, 25378, ..., 27609, 27668, 27667],
        ...,
        [26126, 26055, 25934, ..., 25622, 25586, 25520],
        [26149, 26077, 25935, ..., 25651, 25594, 25507],
        [26147, 26050, 25856, ..., 25696, 25644, 25571]]], dtype=uint16)

array([ 1,  2,  3,  4,  5,  6,  7,  9, 10, 11])

array([4443060., 4443030., 4443000., ..., 4413150., 4413120., 4413090.])

array([476070., 476100., 476130., ..., 503700., 503730., 503760.])


# Original data was about 8000 x 8000 in size
ds.shape

(10, 7871, 7741)


# Sliced data is only about 1000 x 1000 in size!
subset.shape

(10, 1000, 924)


# City limits plot
limits_plot = city_limits.hvplot.polygons(
    line_color="white", alpha=0, line_alpha=1, crs=32618, geo=True
)

# Landsat plot
landsat_plot = subset.sel(band=3).hvplot.image(
    x="x",
    y="y",
    rasterize=True,
    cmap="viridis",
    frame_width=500,
    frame_height=500,
    geo=True,
    crs=32618,
)

# Combined
landsat_plot * limits_plot

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


band_info.head()


# Selet the bands we need
NIR = subset.sel(band=5)
RED = subset.sel(band=4)

# Calculate the NDVI
NDVI = (NIR - RED) / (NIR + RED)
NDVI = NDVI.where(NDVI < np.inf)

NDVI.head()

<xarray.DataArray (y: 5, x: 5)>
array([[0.34690712, 0.43934922, 0.45820226, 0.45798014, 0.38205128],
       [0.33747045, 0.40266976, 0.47276229, 0.44722264, 0.4530777 ],
       [0.40783302, 0.44093268, 0.47419128, 0.49448025, 0.49341935],
       [0.40190311, 0.39986498, 0.46889397, 0.48497283, 0.48686142],
       [0.47248631, 0.47255625, 0.46365524, 0.37692424, 0.36055777]])
Coordinates:
  * y        (y) float64 4.443e+06 4.443e+06 4.443e+06 4.443e+06 4.443e+06
  * x        (x) float64 4.761e+05 4.761e+05 4.761e+05 4.762e+05 4.762e+05

array([[0.34690712, 0.43934922, 0.45820226, 0.45798014, 0.38205128],
       [0.33747045, 0.40266976, 0.47276229, 0.44722264, 0.4530777 ],
       [0.40783302, 0.44093268, 0.47419128, 0.49448025, 0.49341935],
       [0.40190311, 0.39986498, 0.46889397, 0.48497283, 0.48686142],
       [0.47248631, 0.47255625, 0.46365524, 0.37692424, 0.36055777]])

array([4443060., 4443030., 4443000., 4442970., 4442940.])

array([476070., 476100., 476130., 476160., 476190.])


# NEW: Use datashader to plot the NDVI
p = NDVI.hvplot.image(
    x="x",
    y="y",
    geo=True,
    crs=32618,
    datashade=True, # NEW: USE DATASHADER
    project=True, # NEW: PROJECT THE DATA BEFORE PLOTTING
    frame_height=500,
    frame_width=500,
    cmap="viridis",
)

# NEW: Use a transparent rasterized version of the plot for hover text
# No hover text available with datashaded images so we can use the rasterized version
p_hover = NDVI.hvplot(
    x="x",
    y="y",
    crs=32618,
    geo=True,
    rasterize=True,
    frame_height=500,
    frame_width=500,
    alpha=0, # SET ALPHA=0 TO MAKE THIS TRANSPARENT
    colorbar=False,
)

# City limits
limits_plot = city_limits.hvplot.polygons(
    geo=True, crs=32618, line_color="white", line_width=2, alpha=0, line_alpha=1
)

p * p_hover * limits_plot


from rio_toa import brightness_temp, toa_utils


def land_surface_temp(BT, w, NDVI):
    """Calculate land surface temperature of Landsat 8
    
    temp = BT/1 + w * (BT /p) * ln(e)
    
    BT = At Satellite temperature (brightness)
    w = wavelength of emitted radiance (μm)
    
    where p = h * c / s (1.439e-2 mK)
    
    h = Planck's constant (Js)
    s = Boltzmann constant (J/K)
    c = velocity of light (m/s)
    """
    h = 6.626e-34
    s = 1.38e-23
    c = 2.998e8

    p = (h * c / s) * 1e6

    Pv = (NDVI - NDVI.min() / NDVI.max() - NDVI.min()) ** 2
    e = 0.004 * Pv + 0.986

    return BT / 1 + w * (BT / p) * np.log(e)


def land_surface_temp_for_band(band):
    """
    Utility function to get land surface temperature for a specific band
    """
    # params from general Landsat info
    w = band_info.loc[band]["Nominal Wavelength (µm)"]

    # params from specific Landsat data text file for these images
    ML = metadata[f"RADIANCE_MULT_BAND_{band}"]
    AL = metadata[f"RADIANCE_ADD_BAND_{band}"]
    K1 = metadata[f"K1_CONSTANT_BAND_{band}"]
    K2 = metadata[f"K2_CONSTANT_BAND_{band}"]

    at_satellite_temp = brightness_temp.brightness_temp(
        subset.sel(band=band).values, ML, AL, K1, K2
    )

    return land_surface_temp(at_satellite_temp, w, NDVI)


# temperature for band 10
band = 10
temp_10 = land_surface_temp_for_band(band).compute()


# convert to Farenheit
temp_10_f = toa_utils.temp_rescale(temp_10, 'F')


temp_10_f

<xarray.DataArray (y: 1000, x: 924)>
array([[82.90960333, 82.6719826 , 82.40894577, ..., 97.35595848,
        97.21128509, 96.91376458],
       [82.52155948, 82.37134833, 82.17908169, ..., 96.97252484,
        97.08602323, 97.08602622],
       [82.2792908 , 82.26686815, 82.16232996, ..., 96.63949101,
        97.01559592, 97.1564358 ],
       ...,
       [87.2877158 , 86.61250194, 85.72552832, ..., 84.9339736 ,
        84.9504808 , 84.55381061],
       [87.47549589, 86.78863063, 85.65558184, ..., 85.07848945,
        84.9133618 , 84.42561547],
       [87.44697798, 86.73954729, 85.31790267, ..., 85.20644222,
        84.9835009 , 84.53307946]])
Coordinates:
  * y        (y) float64 4.443e+06 4.443e+06 4.443e+06 ... 4.413e+06 4.413e+06
  * x        (x) float64 4.761e+05 4.761e+05 4.761e+05 ... 5.037e+05 5.038e+05

array([[82.90960333, 82.6719826 , 82.40894577, ..., 97.35595848,
        97.21128509, 96.91376458],
       [82.52155948, 82.37134833, 82.17908169, ..., 96.97252484,
        97.08602323, 97.08602622],
       [82.2792908 , 82.26686815, 82.16232996, ..., 96.63949101,
        97.01559592, 97.1564358 ],
       ...,
       [87.2877158 , 86.61250194, 85.72552832, ..., 84.9339736 ,
        84.9504808 , 84.55381061],
       [87.47549589, 86.78863063, 85.65558184, ..., 85.07848945,
        84.9133618 , 84.42561547],
       [87.44697798, 86.73954729, 85.31790267, ..., 85.20644222,
        84.9835009 , 84.53307946]])

array([4443060., 4443030., 4443000., ..., 4413150., 4413120., 4413090.])

array([476070., 476100., 476130., ..., 503700., 503730., 503760.])


# Save the numpy array as an xarray
band_10 = subset.sel(band=band).copy(data=temp_10_f)


band_10

<xarray.DataArray (y: 1000, x: 924)>
array([[82.90960333, 82.6719826 , 82.40894577, ..., 97.35595848,
        97.21128509, 96.91376458],
       [82.52155948, 82.37134833, 82.17908169, ..., 96.97252484,
        97.08602323, 97.08602622],
       [82.2792908 , 82.26686815, 82.16232996, ..., 96.63949101,
        97.01559592, 97.1564358 ],
       ...,
       [87.2877158 , 86.61250194, 85.72552832, ..., 84.9339736 ,
        84.9504808 , 84.55381061],
       [87.47549589, 86.78863063, 85.65558184, ..., 85.07848945,
        84.9133618 , 84.42561547],
       [87.44697798, 86.73954729, 85.31790267, ..., 85.20644222,
        84.9835009 , 84.53307946]])
Coordinates:
    band     int64 10
  * y        (y) float64 4.443e+06 4.443e+06 4.443e+06 ... 4.413e+06 4.413e+06
  * x        (x) float64 4.761e+05 4.761e+05 4.761e+05 ... 5.037e+05 5.038e+05
Attributes:
    transform:      (30.0, 0.0, 395385.0, 0.0, -30.0, 4582215.0)
    crs:            +init=epsg:32618
    res:            (30.0, 30.0)
    is_tiled:       1
    nodatavals:     (nan,)
    scales:         (1.0,)
    offsets:        (0.0,)
    AREA_OR_POINT:  Point

array([[82.90960333, 82.6719826 , 82.40894577, ..., 97.35595848,
        97.21128509, 96.91376458],
       [82.52155948, 82.37134833, 82.17908169, ..., 96.97252484,
        97.08602323, 97.08602622],
       [82.2792908 , 82.26686815, 82.16232996, ..., 96.63949101,
        97.01559592, 97.1564358 ],
       ...,
       [87.2877158 , 86.61250194, 85.72552832, ..., 84.9339736 ,
        84.9504808 , 84.55381061],
       [87.47549589, 86.78863063, 85.65558184, ..., 85.07848945,
        84.9133618 , 84.42561547],
       [87.44697798, 86.73954729, 85.31790267, ..., 85.20644222,
        84.9835009 , 84.53307946]])

array(10)

array([4443060., 4443030., 4443000., ..., 4413150., 4413120., 4413090.])

array([476070., 476100., 476130., ..., 503700., 503730., 503760.])


# Get the land surface temp
band = 11
temp_11 = land_surface_temp_for_band(band).compute()

# Convert to Farenheit
temp_11_f = toa_utils.temp_rescale(temp_11, "F")

# Save as an xarray DataArray
band_11 = subset.sel(band=band).copy(data=temp_11_f)


# plot for band 10
plot_10 = band_10.hvplot.image(
    x="x",
    y="y",
    rasterize=True,
    cmap="fire_r",
    crs=32618,
    geo=True,
    frame_height=400,
    frame_width=400,
    title="Band 10",
)

# plot for band 11
plot_11 = band_11.hvplot.image(
    x="x",
    y="y",
    rasterize=True,
    cmap="fire_r",
    crs=32618,
    geo=True,
    frame_height=400,
    frame_width=400,
    title="Band 11",
)

plot_10 + plot_11


# Let's average the results of these two bands
mean_temp_f = (band_10 + band_11) / 2

# IMPORTANT: copy over the metadata
mean_temp_f.attrs = band_10.attrs


mean_temp_f

<xarray.DataArray (y: 1000, x: 924)>
array([[79.40579086, 79.18871976, 78.98910218, ..., 91.86524855,
        91.72038357, 91.49660753],
       [79.07306227, 78.86421586, 78.64685031, ..., 91.56712235,
        91.60934815, 91.6311314 ],
       [78.87625297, 78.77157573, 78.57777687, ..., 91.33527213,
        91.66605844, 91.73405961],
       ...,
       [83.01805262, 82.50340008, 81.75768196, ..., 80.579109  ,
        80.49671178, 80.13207064],
       [83.16916028, 82.64632139, 81.7252    , ..., 80.72431108,
        80.4982923 , 80.03521661],
       [83.14992768, 82.55447768, 81.35868618, ..., 80.90148737,
        80.65920451, 80.25022931]])
Coordinates:
  * y        (y) float64 4.443e+06 4.443e+06 4.443e+06 ... 4.413e+06 4.413e+06
  * x        (x) float64 4.761e+05 4.761e+05 4.761e+05 ... 5.037e+05 5.038e+05
Attributes:
    transform:      (30.0, 0.0, 395385.0, 0.0, -30.0, 4582215.0)
    crs:            +init=epsg:32618
    res:            (30.0, 30.0)
    is_tiled:       1
    nodatavals:     (nan,)
    scales:         (1.0,)
    offsets:        (0.0,)
    AREA_OR_POINT:  Point

array([[79.40579086, 79.18871976, 78.98910218, ..., 91.86524855,
        91.72038357, 91.49660753],
       [79.07306227, 78.86421586, 78.64685031, ..., 91.56712235,
        91.60934815, 91.6311314 ],
       [78.87625297, 78.77157573, 78.57777687, ..., 91.33527213,
        91.66605844, 91.73405961],
       ...,
       [83.01805262, 82.50340008, 81.75768196, ..., 80.579109  ,
        80.49671178, 80.13207064],
       [83.16916028, 82.64632139, 81.7252    , ..., 80.72431108,
        80.4982923 , 80.03521661],
       [83.14992768, 82.55447768, 81.35868618, ..., 80.90148737,
        80.65920451, 80.25022931]])

array([4443060., 4443030., 4443000., ..., 4413150., 4413120., 4413090.])

array([476070., 476100., 476130., ..., 503700., 503730., 503760.])


p = mean_temp_f.hvplot.image(
    x="x",
    y="y",
    rasterize=True,
    crs=32618,
    geo=True,
    frame_width=600,
    frame_height=600,
    cmap="rainbow",
    alpha=0.5,
    legend=False,
    title="Mean Surface Temp (F)"
)

img = p * EsriImagery

img


hot_spots = mean_temp_f.where(mean_temp_f > 90)


hot_spots_plot = hot_spots.hvplot.image(
    x="x",
    y="y",
    cmap="fire",
    crs=32618,
    geo=True,
    frame_height=500,
    frame_width=500,
    colorbar=False,
    legend=False,
    rasterize=True,
    title="Mean Temp (F) > 90",
    alpha=0.7
)


hot_spots_plot * EsriImagery


cold_spots = mean_temp_f.where(mean_temp_f < 75)


cold_spots_plot = cold_spots.hvplot.image(
    x="x",
    y="y",
    cmap="fire",
    geo=True,
    crs=32618,
    frame_height=500,
    frame_width=500,
    colorbar=False,
    legend=False,
    rasterize=True,
    title="Mean Temp (F) < 75",
    alpha=0.5
)


cold_spots_plot * EsriImagery


import rioxarray


# Save to a raster GeoTIFF file!
mean_temp_f.rio.to_raster("./mean_temp_f.tif")

Warning 1: +init=epsg:XXXX syntax is deprecated. It might return a CRS with a non-EPSG compliant axis order.


color_palette = {
    "A": "#388E3C",
    "B": "#1976D2",
    "C": "#FFEE58",
    "D": "#D32F2F",
    "Commercial": "#a1a1a1",
}


redlining = gpd.read_file("./data/redlining.geojson")


redlining.head()


# OPTION 1
colors = redlining['holc_grade'].replace(color_palette)


# OPTION 2
def get_color(grade):
    return color_palette[grade]

colors = redlining["holc_grade"].apply(get_color)


colors.head()

0    #388E3C
1    #388E3C
2    #388E3C
3    #388E3C
4    #388E3C
Name: holc_grade, dtype: object


redlining["holc_color"] = colors


holc_map = redlining.hvplot.polygons(
    c="holc_color",
    frame_width=400,
    frame_height=400,
    geo=True,
    hover_cols=["holc_grade"],
    alpha=0.5
) 

holc_map * gv.tile_sources.ESRI

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/geoviews/operation/projection.py:79: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
  polys = [g for g in geom if g.area > 1e-15]


from rasterstats import zonal_stats
import rasterio as rio


# Open the GeoTIFF file using rasterio
f = rio.open('./mean_temp_f.tif')
f

<open DatasetReader name='./mean_temp_f.tif' mode='r'>


temp_data = f.read(1)

temp_data

array([[79.40579086, 79.18871976, 78.98910218, ..., 91.86524855,
        91.72038357, 91.49660753],
       [79.07306227, 78.86421586, 78.64685031, ..., 91.56712235,
        91.60934815, 91.6311314 ],
       [78.87625297, 78.77157573, 78.57777687, ..., 91.33527213,
        91.66605844, 91.73405961],
       ...,
       [83.01805262, 82.50340008, 81.75768196, ..., 80.579109  ,
        80.49671178, 80.13207064],
       [83.16916028, 82.64632139, 81.7252    , ..., 80.72431108,
        80.4982923 , 80.03521661],
       [83.14992768, 82.55447768, 81.35868618, ..., 80.90148737,
        80.65920451, 80.25022931]])


mean_temp_f.values

array([[79.40579086, 79.18871976, 78.98910218, ..., 91.86524855,
        91.72038357, 91.49660753],
       [79.07306227, 78.86421586, 78.64685031, ..., 91.56712235,
        91.60934815, 91.6311314 ],
       [78.87625297, 78.77157573, 78.57777687, ..., 91.33527213,
        91.66605844, 91.73405961],
       ...,
       [83.01805262, 82.50340008, 81.75768196, ..., 80.579109  ,
        80.49671178, 80.13207064],
       [83.16916028, 82.64632139, 81.7252    , ..., 80.72431108,
        80.4982923 , 80.03521661],
       [83.14992768, 82.55447768, 81.35868618, ..., 80.90148737,
        80.65920451, 80.25022931]])


f.crs.data

{'init': 'epsg:32618'}


# Get the CRS
CRS = f.crs.data
print(CRS)

{'init': 'epsg:32618'}


#  Get the transform too
transform = f.transform


# Use zonal_stats to get the mean temperature values within each redlined area
# FIRST ARGUMENT: vector polygons
# SECOND ARGUMENT: mean temperature raster data
redlining_stats = zonal_stats(
    redlining.to_crs(CRS["init"]), mean_temp_f.values, affine=transform, nodata=np.nan
)

# Converts stats to a dataframe
redlining_stats = pd.DataFrame(redlining_stats)

redlining_stats.head()


### ALTERNATIVE
### in this case, you don't need to specify the transform,
### it's loaded from the TIF file

redlining_stats = zonal_stats(redlining.to_crs(CRS["init"]), "./mean_temp_f.tif")

# Converts stats to a dataframe
redlining_stats = pd.DataFrame(redlining_stats)


# Store the mean temp back in the original redlining dataframe
redlining["mean_temp"] = redlining_stats['mean']


redlining.head()


mean_temp_f.values

array([[79.40579086, 79.18871976, 78.98910218, ..., 91.86524855,
        91.72038357, 91.49660753],
       [79.07306227, 78.86421586, 78.64685031, ..., 91.56712235,
        91.60934815, 91.6311314 ],
       [78.87625297, 78.77157573, 78.57777687, ..., 91.33527213,
        91.66605844, 91.73405961],
       ...,
       [83.01805262, 82.50340008, 81.75768196, ..., 80.579109  ,
        80.49671178, 80.13207064],
       [83.16916028, 82.64632139, 81.7252    , ..., 80.72431108,
        80.4982923 , 80.03521661],
       [83.14992768, 82.55447768, 81.35868618, ..., 80.90148737,
        80.65920451, 80.25022931]])


# Nan values cause this to not be defined!
mean_temp_f.values.mean()

nan


citywide_mean_temp = np.nanmean(mean_temp_f.values) # use nanmean() to skip NaN pixels automatically!
redlining['mean_temp_normalized'] = redlining['mean_temp'] - citywide_mean_temp


citywide_mean_temp

81.70253802309126


choro = redlining.hvplot(
    c="mean_temp_normalized",
    frame_width=400,
    frame_height=400,
    geo=True,
    hover_cols=["holc_grade", 'mean_temp'],
    alpha=0.5,
    cmap='coolwarm',
) 


choro * gv.tile_sources.ESRI  + holc_map * gv.tile_sources.ESRI

/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/geoviews/operation/projection.py:79: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
  polys = [g for g in geom if g.area > 1e-15]
/Users/nhand/mambaforge/envs/musa-550-fall-2022/lib/python3.9/site-packages/geoviews/operation/projection.py:79: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
  polys = [g for g in geom if g.area > 1e-15]


redlining.groupby("holc_grade")["mean_temp"].mean().sort_values()

holc_grade
A             82.364416
B             84.636335
C             86.298266
D             86.305663
Commercial    86.755549
Name: mean_temp, dtype: float64


canopy = gpd.read_file("data/ppr_tree_canopy_outlines_2015/")


canopy.head()


canopy = canopy.to_crs(epsg=3857)
redlining = redlining.to_crs(epsg=3857)


canopy_intersection = gpd.overlay(redlining, canopy, how='intersection')


canopy_intersection['area'] = canopy_intersection.geometry.area
canopy_area = canopy_intersection.groupby("holc_grade").area.sum()


redlining['area'] = redlining.geometry.area
redlining_area = redlining.groupby("holc_grade").area.sum()


tree_coverage = canopy_area / redlining_area

tree_coverage.sort_values()

holc_grade
Commercial    0.020991
D             0.029206
C             0.039274
B             0.047298
A             0.094690
Name: area, dtype: float64

	area_id	holc_grade	geometry
0	0	A	POLYGON ((-75.17329 40.06201, -75.17572 40.060...
1	1	A	MULTIPOLYGON (((-75.19831 40.05021, -75.19899 ...
2	2	A	POLYGON ((-75.12810 40.04734, -75.12844 40.045...
3	3	A	POLYGON ((-75.11494 40.03443, -75.13476 40.036...
4	4	A	POLYGON ((-75.18377 40.02725, -75.17894 40.023...

	min	max	mean	count
0	76.009914	85.732779	81.403242	1731
1	73.232288	88.181473	77.453338	7074
2	78.858240	88.117261	82.955090	1639
3	76.950025	95.523473	87.345773	2150
4	74.434519	89.862934	79.951873	1717

	area_id	holc_grade	geometry	holc_color	mean_temp
0	0	A	POLYGON ((-75.17329 40.06201, -75.17572 40.060...	#388E3C	81.403242
1	1	A	MULTIPOLYGON (((-75.19831 40.05021, -75.19899 ...	#388E3C	77.453338
2	2	A	POLYGON ((-75.12810 40.04734, -75.12844 40.045...	#388E3C	82.955090
3	3	A	POLYGON ((-75.11494 40.03443, -75.13476 40.036...	#388E3C	87.345773
4	4	A	POLYGON ((-75.18377 40.02725, -75.17894 40.023...	#388E3C	79.951873

	avg_height	geometry
0	6	POLYGON ((-75.27506 39.97745, -75.27505 39.977...
1	17	POLYGON ((-75.26884 39.97958, -75.26884 39.979...
2	32	POLYGON ((-75.26510 39.96519, -75.26512 39.965...
3	15	POLYGON ((-75.27271 39.97838, -75.27271 39.978...
4	32	POLYGON ((-75.26491 39.96518, -75.26497 39.965...

Week 8B: Raster Data in the Wild¶

This week: raster data analysis with the holoviz ecosystem¶

Example #2: Urban heat islands¶

First load some metadata for Landsat 8¶

Landsat data on Google Cloud Storage¶

Use a utility function to query the API and request a specific band¶

Load all of the bands and combine them into a single xarray DataArray¶

Important: CRS info¶

Also grab the metadata from Google Cloud Storage¶

Let's trim our data to the Philadelphia limits¶

1. Load the city limits¶

2. Use the xmin/xmax and ymin/ymax of the city limits to trim the Landsat DataArray¶

3. Call the compute() function on your sliced data¶

Let's plot band 3 of the Landsat data¶

Calculate the NDVI¶

A couple of notes¶

Now, on to land surface temperature¶

Get land surface temp for band 10¶

Get the land surface temp for band 11¶

Plot both temperatures side-by-side¶

Plot the final product (the average of the bands)¶

Use the zoom tool to identify hot spots¶

Exercise: isolate cold/hot spots on the map¶

What about cold spots?¶

Cold spots pick out bodies of water and parks / areas of high vegetation!¶

Exercise: Exploring the relationship between temperature, trees, and redlining in Philadelphia¶

Redlining Readings¶

Saving xarray data to a geoTIFF file¶

Part 1: Load the redlined neighborhoods and make an interactive choropleth¶

Part 2: Calculate the mean temperature for each redlined area¶

Part 3: Use hvplot to plot a choropleth of the mean temperature¶

Part 4: Calculate the average temp by HOLC grade¶

Time Permitting...¶

Part 5: Calculate the average tree coverage for each HOLC grade¶

Part 5.1: Load the data from the `data/` folder¶

Part 5.2: Calculate the intersection of the tree canopy and redlined areas¶

Part 5.3: Calculate the total area of the canopy and redlined areas¶

Part 5.4: Calculate the average tree coverage for each HOLC grade¶

	Name	Wavelength Range (µm)	Nominal Wavelength (µm)	Resolution (m)	Description
Band
1	Aerosol	0.43 - 0.45	0.440	30	Coastal aerosol
2	Blue	0.45 - 0.51	0.480	30	Blue
3	Green	0.53 - 0.59	0.560	30	Green
4	Red	0.64 - 0.67	0.655	30	Red
5	NIR	0.85 - 0.88	0.865	30	Near Infrared (NIR)
6	SWIR1	1.57 - 1.65	1.610	30	Shortwave Infrared (SWIR) 1
7	SWIR2	2.11 - 2.29	2.200	30	Shortwave Infrared (SWIR) 2
8	Panc	0.50 - 0.68	0.590	15	Panchromatic
9	Cirrus	1.36 - 1.38	1.370	30	Cirrus
10	TIRS1	10.60 - 11.19	10.895	100 * (30)	Thermal Infrared (TIRS) 1
11	TIRS2	11.50 - 12.51	12.005	100 * (30)	Thermal Infrared (TIRS) 2

Week 8B: Raster Data in the Wild¶

This week: raster data analysis with the holoviz ecosystem¶

Example #2: Urban heat islands¶

First load some metadata for Landsat 8¶

Landsat data on Google Cloud Storage¶

Use a utility function to query the API and request a specific band¶

Load all of the bands and combine them into a single xarray DataArray¶

Important: CRS info¶

Also grab the metadata from Google Cloud Storage¶

Let's trim our data to the Philadelphia limits¶

1. Load the city limits¶

2. Use the xmin/xmax and ymin/ymax of the city limits to trim the Landsat DataArray¶

3. Call the compute() function on your sliced data¶

Let's plot band 3 of the Landsat data¶

Calculate the NDVI¶

A couple of notes¶

Now, on to land surface temperature¶

Get land surface temp for band 10¶

Get the land surface temp for band 11¶

Plot both temperatures side-by-side¶

Plot the final product (the average of the bands)¶

Use the zoom tool to identify hot spots¶

Exercise: isolate cold/hot spots on the map¶

What about cold spots?¶

Cold spots pick out bodies of water and parks / areas of high vegetation!¶

Exercise: Exploring the relationship between temperature, trees, and redlining in Philadelphia¶

Redlining Readings¶

Saving xarray data to a geoTIFF file¶

Part 1: Load the redlined neighborhoods and make an interactive choropleth¶

Part 2: Calculate the mean temperature for each redlined area¶

Part 3: Use hvplot to plot a choropleth of the mean temperature¶

Part 4: Calculate the average temp by HOLC grade¶

Time Permitting...¶

Part 5: Calculate the average tree coverage for each HOLC grade¶

Part 5.1: Load the data from the data/ folder¶

Part 5.2: Calculate the intersection of the tree canopy and redlined areas¶

Part 5.3: Calculate the total area of the canopy and redlined areas¶

Part 5.4: Calculate the average tree coverage for each HOLC grade¶

Part 5.1: Load the data from the `data/` folder¶