Object-based colocalization analysis

Contents

Object-based colocalization analysis#

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "locan",
#     "matplotlib",
#     "ndv[jupyter,vispy]",
#     "numpy",
#     "scikit-image",
#     "scipy",
#     "shapely",
#     "tifffile",
# ]
# ///

Description#

In this Notebook, we learn about point-based colocalization analysis

Objectives#

Learn about mean nearest neighbor, the nearest neighbor function, and Ripley’s K function.
Learn Monte Carlo based validation.

Table of Contents#

  1. Load the data

  2. Plot the data

  3. Mean nearest neighbor distance

  4. Nearest neighbor function

  5. Ripley’s K - with and without boundary correction

  6. Validation - the null distribution

  7. Validation - Monte Carlo based null hypothesis testing

In this exercise, we will use the Object Based Colocalization Dataset .

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from rich import print
from scipy.spatial import distance_matrix
from shapely.geometry import Point, box

0. Load the data#

base = Path("../../_static/images/coloc/obj_based/")

fall_med_iac = np.loadtxt(base / "p_400_c1.csv", delimiter=",")
fall_med_bob = np.loadtxt(base / "p_400_c2.csv", delimiter=",")
fall_cold_iac = np.loadtxt(base / "g_p_400_dual_1.csv", delimiter=",")
fall_cold_bob = np.loadtxt(base / "g_p_400_dual_2.csv", delimiter=",")
fall_warm_iac = np.loadtxt(base / "p_400_c1_rep.csv", delimiter=",")
fall_warm_bob = np.loadtxt(base / "p_400_c2_rep.csv", delimiter=",")
winter_warm_iac = np.loadtxt(base / "g_p_400_c1.csv", delimiter=",")
winter_warm_bob = np.loadtxt(base / "g_p_400_c2.csv", delimiter=",")
winter_medium_iac = np.loadtxt(base / "g_p_as_c1.csv", delimiter=",")
winter_medium_bob = np.loadtxt(base / "g_p_as_c2.csv", delimiter=",")
winter_cold_iac = np.loadtxt(base / "g_p_400_c1_cor.csv", delimiter=",")
winter_cold_bob = np.loadtxt(base / "g_p_400_c2_cor.csv", delimiter=",")

1. Plot the data#

def plotloc_2c(
    points_iac: np.ndarray,
    points_bob: np.ndarray,
    title: str,
    s: float = 15,
    ch1: str = "IAC",
    ch2: str = "BOB",
) -> None:
    """
    Plot two channels in a 2D space.
    Parameters:
    points_iac (np.ndarray): Points for channel 1.
    points_bob (np.ndarray): Points for channel 2.
    """
    plt.scatter(points_iac[:, 0], points_iac[:, 1], c="blue", label=ch1, s=s, alpha=1)
    plt.scatter(
        points_bob[:, 0],
        points_bob[:, 1],
        c="magenta",
        label=ch2,
        s=s,
        alpha=0.75,
        marker="s",
    )
    plt.title(label=title)
    plt.legend()

    plt.xlabel("x [au]")
    plt.ylabel("y [au]")
    ax = plt.gca()
    ax.set_aspect("equal", adjustable="box")
print(fall_med_iac.shape, fall_med_bob.shape)
plotloc_2c(fall_med_iac, fall_med_bob, title="Fall, medium")
(400, 2)
(400, 2)
../../_images/e9df1815386ca4bb02c502e237278c1d02d40f484e6afdacdcdaeb9c90912a7b.png
print(fall_cold_iac.shape, fall_cold_bob.shape)
plotloc_2c(fall_cold_iac, fall_cold_bob, title="Fall, cold")
(400, 2)
(400, 2)
../../_images/7433031ccfa9de0091426cdff4c527a75ebdaf74a2bfc2b6d6c19615cad5bdce.png
print(fall_warm_iac.shape, fall_warm_bob.shape)
plotloc_2c(fall_warm_iac, fall_warm_bob, title="Fall, warm")
(400, 2)
(400, 2)
../../_images/8c9f43cc3ce6cc4b710ee82537c9f28e6f7b971d89c26c98bfb9354a2cd91f6f.png
print(winter_cold_iac.shape, winter_cold_bob.shape)
plotloc_2c(winter_cold_iac, winter_cold_bob, title="Winter, cold")
(400, 2)
(400, 2)
../../_images/d6165a462f151c88b566f43cf1e98c4731fa7c28587e8ab91da4fff51794eef0.png
print(winter_medium_iac.shape, winter_medium_bob.shape)
plotloc_2c(winter_medium_iac, winter_medium_bob, title="Winter, medium")
(400, 2)
(200, 2)
../../_images/ff6bd7b4254e99d9da00e6a70e75d1126b5e2b599d143648ad59bfb961d10426.png
print(winter_warm_iac.shape, winter_warm_bob.shape)
plotloc_2c(winter_warm_iac, winter_warm_bob, title="Winter, warm")
(400, 2)
(400, 2)
../../_images/40c6814f67e28f443f6ebc40526edc5af6cf2de80939cdbf0a10ac43a2df09aa.png

2. Mean nearest neighbor distance#

✍️ Exercise: Code along#

def meanNN(points1: np.ndarray, points2: np.ndarray) -> tuple[np.ndarray, float]:
    """
    Computes the mean nearest neighbor distances for a set of 2D points.
    Parameters:
    - points1: array of shape (n_points, 2) representing the coordinates of the first set of points
    - points2: array of shape (n_points, 2) representing the coordinates of the second set of points; if None, uses points1

    Returns:
    - min_dists: array of minimum distances of each point in points1 to its nearest neighbor in points2
    - mean_nn: mean of the minimum distances
    """

    d12 = distance_matrix(points1, points2)

    if points1 is points2 or np.shares_memory(points1, points2):
        np.fill_diagonal(d12, np.inf)

    min_dists = np.min(d12, axis=1)
    mean_nn = np.mean(min_dists)

    return min_dists, mean_nn

Compute Fall samples#

nndist_fall_med, meannn_fall_med = meanNN(fall_med_iac, fall_med_bob)
nndist_fall_cold, meannn_fall_cold = meanNN(fall_cold_iac, fall_cold_bob)
nndist_fall_warm, meannn_fall_warm = meanNN(fall_warm_iac, fall_warm_bob)


# nndist_fall_med, meannn_fall_med = meanNN(fall_med_bob, fall_med_iac, )
# nndist_fall_cold, meannn_fall_cold = meanNN(fall_cold_bob, fall_cold_iac)
# nndist_fall_warm, meannn_fall_warm = meanNN(fall_warm_bob, fall_warm_iac)

Plot Fall samples#

plt.hist(nndist_fall_med, label="Fall medium", alpha=0.5, bins=25)
plt.hist(nndist_fall_cold, label="Fall cold", alpha=0.5, bins=25)
plt.hist(nndist_fall_warm, label="Fall warm", alpha=0.5, bins=25)
plt.xlabel("distance to nearest neighbor [a.u.]")
plt.axvline(
    meannn_fall_med,
    c="k",
    linestyle="--",
    label=f"Fall medium: {meannn_fall_med.round(2)}",
)
plt.axvline(
    meannn_fall_cold,
    c="k",
    linestyle=":",
    label=f"Fall cold: {meannn_fall_cold.round(2)}",
)
plt.axvline(
    meannn_fall_warm,
    c="k",
    linestyle="-",
    label=f"Fall warm: {meannn_fall_warm.round(2)}",
)
plt.legend()
<matplotlib.legend.Legend at 0x7f3a5f7ef470>
../../_images/2b6fa59584ebcdff1ca1be193854acd09511d61b00eab70b3ea0078c5bc80827.png

Compute Winter samples#

nndist_winter_warm, meannn_winter_warm = meanNN(winter_warm_iac, winter_warm_bob)
nndist_winter_medium, meannn_winter_medium = meanNN(
    winter_medium_iac, winter_medium_bob
)
nndist_winter_cold, meannn_winter_cold = meanNN(winter_cold_iac, winter_cold_bob)

Plot Winter samples#

plt.hist(nndist_winter_cold, label="Winter cold", alpha=0.5, bins=25)
plt.hist(nndist_winter_medium, label="Winter medium", alpha=0.5, bins=25)
plt.hist(nndist_winter_warm, label="Winter warm", alpha=0.5, bins=25)

plt.xlabel("distance to nearest neighbor [a.u.]")
plt.title(" IAC -> BOB ")
plt.axvline(
    meannn_winter_cold,
    c="k",
    linestyle=":",
    label=f"Winter cold: {meannn_winter_cold.round(2)}",
)
plt.axvline(
    meannn_winter_medium,
    c="k",
    linestyle="--",
    label=f"Winter medium: {meannn_winter_medium.round(2)}",
)
plt.axvline(
    meannn_winter_warm,
    c="k",
    linestyle="-",
    label=f"Winter warm: {meannn_winter_warm.round(2)}",
)
plt.legend()
<matplotlib.legend.Legend at 0x7f3a5f61b050>
../../_images/165de865ed6f736c431eae57f215b788ce41a7c355178d2063165d23b2988657.png

✍️ Exercise: Change the order of points provided to meanNN() for the winter samples#

E.g. switch the order in which winter_warm_iac and winter_warm_bob is passed to meanNN().
Do the same for winter_cold_iac and winter_cold_bob, as well as winter_medium_iac and winter_medium_bob.

nndist_winter_warm_2, meannn_winter_warm_2 = ..., ...
nndist_winter_medium_2, meannn_winter_medium_2 = ..., ...
nndist_winter_cold_2, meannn_winter_cold_2 = ..., ...
nndist_winter_warm_2, meannn_winter_warm_2 = meanNN(winter_warm_bob, winter_warm_iac)
nndist_winter_medium_2, meannn_winter_medium_2 = meanNN(
    winter_medium_bob, winter_medium_iac
)
nndist_winter_cold_2, meannn_winter_cold_2 = meanNN(winter_cold_bob, winter_cold_iac)

Plot the results#

plt.hist(nndist_winter_cold_2, label="Winter cold", alpha=0.5, bins=25)
plt.hist(nndist_winter_medium_2, label="Winter medium", alpha=0.5, bins=25)
plt.hist(nndist_winter_warm_2, label="Winter warm", alpha=0.5, bins=25)

plt.title("BOB -> IAC")
plt.xlabel("distance to nearest neighbor [a.u.]")
plt.axvline(
    meannn_winter_cold_2,
    c="k",
    linestyle=":",
    label=f"Winter cold: {meannn_winter_cold_2.round(2)}",
)
plt.axvline(
    meannn_winter_medium_2,
    c="k",
    linestyle="--",
    label=f"Winter medium: {meannn_winter_medium_2.round(2)}",
)
plt.axvline(
    meannn_winter_warm_2,
    c="k",
    linestyle="-",
    label=f"Winter warm: {meannn_winter_warm_2.round(2)}",
)
plt.legend()
<matplotlib.legend.Legend at 0x7f3a5f6df110>
../../_images/b77ba446e29909335c66e64fadae5cbf32703fb4d17fd8d44fd1944a483c563f.png

3. Nearest neighbor function#

def nearest_neighbor_function(
    points1: np.ndarray, points2: np.ndarray, radii: np.ndarray
) -> np.ndarray:
    """
    Computes the nearest neighbor function for a set of 2D points.

    Parameters:
    - points1: array of shape (n_points, 2) representing the coordinates of the first set of points
    - points2: array of shape (n_points, 2) representing the coordinates of the second set of points
    - radii: array-like of radii at which to evaluate the nearest neighbor function
    Returns:
    - S: array of nearest neighbor function values at each radius
    - mu0: array of expected values under the null model at each radius
    """

    # calculate area based on bounding box of all points
    allpoints = np.vstack((points1, points2))
    max_x, max_y = np.max(allpoints, axis=0)
    min_x, min_y = np.min(allpoints, axis=0)
    (max_x - min_x) * (max_y - min_y)

    n1 = len(points1)  # number of points in the first set
    d12 = distance_matrix(points1, points2)

    if points1 is points2 or np.shares_memory(points1, points2):
        np.fill_diagonal(
            d12, np.inf
        )  # Set diagonal to infinity to ignore self-distances

    min_dists = np.min(d12, axis=1)  # nearest neighbor distances per point

    S = np.empty(radii.shape, dtype=float)

    for i, r in enumerate(radii):
        within_r = min_dists < r
        n_within_r = len(min_dists[within_r])
        S[i] = n_within_r / n1

    return S

Hide code cell content

def plot_nn_function(
    s: np.ndarray,
    radii: np.ndarray,
    nulldist: np.ndarray = None,
    show: bool = True,
    label: str = "observed",
    line: str = "-",
) -> None:
    """Plots the nearest neighbor function S(r) with optional null model mean.
    Parameters:
    - s: array of S(r) values
    - radii: array of radii at which S(r) is evaluated
    - nulldist: optional array of null model mean values; if provided, plotted in red
    - show: if True, displays the plot; if False, only prepares the plot
    - label: label for the S(r) line in the plot
    - line: line style for the S(r) plot; default is solid line
    """
    if nulldist is not None:
        plt.plot(radii, nulldist, c="red", alpha=0.5, label="null‐model mean")

    plt.plot(radii, s, c="black", alpha=0.8, label=f"{label}", linestyle=line)

    plt.legend()
    plt.xlabel("r")
    plt.ylabel("S(r)")
    plt.tight_layout()
    if show:
        plt.show()

Define radii for which to evaluate the nearest neighbor function#

radii = np.arange(0, 3, 0.25)

Inspect the fall samples#

s_fall_med = nearest_neighbor_function(fall_med_iac, fall_med_bob, radii)
s_fall_warm = nearest_neighbor_function(fall_warm_iac, fall_warm_bob, radii)
s_fall_cold = nearest_neighbor_function(fall_cold_iac, fall_cold_bob, radii)
plot_nn_function(s_fall_cold, radii, show=False, label="Fall cold", line=":")
plot_nn_function(s_fall_med, radii, show=False, label="Fall medium", line="--")
plot_nn_function(s_fall_warm, radii, show=False, label="Fall warm", line="-")
../../_images/b4c328d0c9f7b70799bfb558f3364f94f23f02edc465956fb3d2ae69e6a853bd.png

Inspect the winter samples#

s_winter_warm = nearest_neighbor_function(winter_warm_iac, winter_warm_bob, radii)
s_winter_medium = nearest_neighbor_function(winter_medium_iac, winter_medium_bob, radii)
s_winter_cold = nearest_neighbor_function(winter_cold_iac, winter_cold_bob, radii)
s_winter_warm_2 = nearest_neighbor_function(winter_warm_bob, winter_warm_iac, radii)
s_winter_medium_2 = nearest_neighbor_function(
    winter_medium_bob, winter_medium_iac, radii
)
s_winter_cold_2 = nearest_neighbor_function(winter_cold_bob, winter_cold_iac, radii)
plot_nn_function(
    s_winter_cold, radii, show=False, label="Winter cold IAC -> BOB", line=":"
)
plot_nn_function(
    s_winter_medium, radii, show=False, label="Winter medium IAC -> BOB", line="--"
)
plot_nn_function(
    s_winter_warm, radii, show=False, label="Winter warm IAC -> BOB", line="-"
)
../../_images/598a295cc8142d1693886fb90548c9c771782d2db1773cfc7f628b9ad449dfdd.png
plot_nn_function(
    s_winter_cold_2, radii, show=False, label="Winter cold BOB -> IAC", line=":"
)
plot_nn_function(
    s_winter_medium_2, radii, show=False, label="Winter medium BOB -> IAC", line="--"
)
plot_nn_function(
    s_winter_warm_2, radii, show=False, label="Winter warm BOB -> IAC", line="-"
)
../../_images/3e83ee075baaabb003027526b72221b0f70d285a744b408d3110bbc4be15ea43.png

4. Ripley’s K function#

def ripleys_k_function(
    points1: np.ndarray,
    points2: np.ndarray,
    radii: np.ndarray,
    area=None,
    edge_correction=False,
) -> np.ndarray:
    """
    Computes Ripley's K function for a set of 2D points.

    Parameters:
    - points1: array of shape (n_points, 2) representing the coordinates of the first set of points
    - points2: array of shape (n_points, 2) representing the coordinates of the second set of points
    - radii: array-like of radii at which to evaluate K
    - area: total area of the observation window; if None, calculated from bounding box
    - edge_correction: if True, applies basic border edge correction

    Returns:
    - ks: K-function values at each radius
    """

    #### Get the number of points
    n1 = len(points1)
    n2 = len(points2)
    ks = np.zeros_like(radii, dtype=float)
    dists = distance_matrix(points1, points2)

    #### Compute area if not provided
    allpoints = np.vstack((points1, points2))
    max_x, max_y = np.max(allpoints, axis=0)

    if area is None:
        min_x, min_y = np.min(allpoints, axis=0)
        area = (max_x - min_x) * (max_y - min_y)
    else:
        min_x = 0
        min_y = 0
        max_x = np.max([max_x, np.sqrt(area)])
        max_y = np.max([max_y, np.sqrt(area)])

    if edge_correction:
        # Determine FOV area
        square = box(min_x, min_y, max_x, max_y)
        area_correction = np.zeros_like(dists)

        for i in range(n1):
            for j in range(n2):
                circle = Point(points1[i]).buffer(dists[i][j])
                if circle.area == 0 or circle.intersection(square).area == 0:
                    continue  # Avoid division by zero
                area_correction[i][j] = circle.area / circle.intersection(square).area

    if points1 is points2 or np.shares_memory(points1, points2):
        np.fill_diagonal(dists, np.inf)

    for i, r in enumerate(radii):
        within_r = dists < r

        if edge_correction:
            within_r = within_r * area_correction

        count_within_r = np.sum(within_r)

        ks[i] = (area / (n1 * n2)) * count_within_r

    return ks

Define radii for which to compute Ripley’s K function#

✍️ Exercise: Think: Should these be the same maximum radius as provided to the nearest neighbor function?#

If yes, why?
If no, why not?

k_radii = np.arange(0, 5.01, 0.5)  # radii for which to compute the K-function

Compute K for the fall samples#

k_fall_med = ripleys_k_function(fall_med_iac, fall_med_bob, radii=k_radii, area=400)
k_fall_cold = ripleys_k_function(fall_cold_iac, fall_cold_bob, radii=k_radii, area=400)
k_fall_warm = ripleys_k_function(fall_warm_iac, fall_warm_bob, radii=k_radii, area=400)

Compute K for the winter samples#

k_winter_warm = ripleys_k_function(
    winter_warm_iac, winter_warm_bob, radii=k_radii, area=400
)
k_winter_medium = ripleys_k_function(
    winter_medium_iac, winter_medium_bob, radii=k_radii, area=400
)
k_winter_cold = ripleys_k_function(
    winter_cold_iac, winter_cold_bob, radii=k_radii, area=400
)

Here, we change the order to BOB -> IAC for k_winter_medium#

k_winter_medium_2 = ripleys_k_function(
    winter_medium_bob, winter_medium_iac, radii=k_radii, area=400
)

print(k_winter_medium_2 == k_winter_medium)
[ True  True  True  True  True  True  True  True  True  True  True]

Inspect the results#

Hide code cell content

def plot_ripleys_k(
    k: np.ndarray,
    radii: np.ndarray,
    label: str = "uniform",
    show: bool = True,
    line: str = "-",
) -> None:
    """
    Plots Ripley's K function.
    Parameters:
    - k: array of K function values
    - radii: array of radii at which K is evaluated
    - label: label for the plot
    - show: whether to show the plot
    - line: line style for the plot
    """
    plt.plot(radii, k, color="k", label=f"{label}, no edge correction", linestyle=line)
    plt.xlabel("r")
    plt.ylabel("K(r)")
    plt.legend()
    if show:
        plt.show()

Fall samples#

plot_ripleys_k(k_fall_cold, k_radii, label="Fall cold", show=False, line=":")
plot_ripleys_k(k_fall_med, k_radii, label="Fall medium", show=False, line="--")
plot_ripleys_k(k_fall_warm, k_radii, label="Fall warm", show=False, line="-")
../../_images/ea43ff76e29683d0b28783f54e44e60905800912c399c7728f8d5a035901e2f0.png

Winter samples#

plot_ripleys_k(k_winter_cold, k_radii, label="Winter cold", line=":", show=False)
plot_ripleys_k(k_winter_medium, k_radii, label="Winter medium", line="--", show=False)
plot_ripleys_k(k_winter_warm, k_radii, label="Winter warm", show=False)
../../_images/b589c30f50d9e30bdf7a1300992766d3ed51797dd92ef5e195fbbba731a8becb.png

4.1 Ripley’s K with edge correction#

Hide code cell content

def plot_ripleys_k_ec(
    k: np.ndarray, k_corrected: np.ndarray, radii: np.ndarray, label: str = "uniform"
) -> None:
    """
    Plots Ripley's K function with and without edge correction.
    Parameters:
    - k: array of K function values without edge correction
    - k_corrected: array of K function values with edge correction
    - radii: array of radii at which K function is evaluated
    - label: label for the plot, default is "uniform"
    """
    plt.plot(
        radii,
        k,
        color="k",
        label=f"{label}, no edge correction",
        linestyle="-",
        alpha=0.5,
    )
    plt.plot(
        radii,
        k_corrected,
        color="k",
        label=f"{label}, edge correction",
        linestyle="-.",
    )
    plt.title("Ripley's K function")
    plt.legend()
    plt.xlabel("r")
    plt.ylabel("K(r)")
    # plt.show()

Compute the fall samples#

k_fall_med_ec = ripleys_k_function(
    fall_med_iac, fall_med_bob, radii=k_radii, area=400, edge_correction=True
)
k_fall_warm_ec = ripleys_k_function(
    fall_warm_iac, fall_warm_bob, radii=k_radii, area=400, edge_correction=True
)

k_fall_cold_ec = ripleys_k_function(
    fall_cold_iac, fall_cold_bob, radii=k_radii, area=400, edge_correction=True
)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[36], line 4
      1 k_fall_med_ec = ripleys_k_function(
      2     fall_med_iac, fall_med_bob, radii=k_radii, area=400, edge_correction=True
      3 )
----> 4 k_fall_warm_ec = ripleys_k_function(
      5     fall_warm_iac, fall_warm_bob, radii=k_radii, area=400, edge_correction=True
      6 )
      8 k_fall_cold_ec = ripleys_k_function(
      9     fall_cold_iac, fall_cold_bob, radii=k_radii, area=400, edge_correction=True
     10 )

Cell In[27], line 49, in ripleys_k_function(points1, points2, radii, area, edge_correction)
     47 for j in range(n2):
     48     circle = Point(points1[i]).buffer(dists[i][j])
---> 49     if circle.area == 0 or circle.intersection(square).area == 0:
     50         continue  # Avoid division by zero
     51     area_correction[i][j] = circle.area / circle.intersection(square).area

File ~/work/bobiac/bobiac/.venv/lib/python3.12/site-packages/shapely/decorators.py:173, in deprecate_positional.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    171 @wraps(func)
    172 def wrapper(*args, **kwargs):
--> 173     result = func(*args, **kwargs)
    175     n = len(args)
    176     if n > warn_from:

File ~/work/bobiac/bobiac/.venv/lib/python3.12/site-packages/shapely/geometry/base.py:674, in BaseGeometry.intersection(self, other, grid_size)
    668 @deprecate_positional(["grid_size"], category=DeprecationWarning)
    669 def intersection(self, other, grid_size=None):
    670     """Return the intersection of the geometries.
    671 
    672     Refer to `shapely.intersection` for full documentation.
    673     """
--> 674     return shapely.intersection(self, other, grid_size=grid_size)

File ~/work/bobiac/bobiac/.venv/lib/python3.12/site-packages/shapely/decorators.py:173, in deprecate_positional.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    171 @wraps(func)
    172 def wrapper(*args, **kwargs):
--> 173     result = func(*args, **kwargs)
    175     n = len(args)
    176     if n > warn_from:

File ~/work/bobiac/bobiac/.venv/lib/python3.12/site-packages/shapely/decorators.py:88, in multithreading_enabled.<locals>.wrapped(*args, **kwargs)
     86     for arr in array_args:
     87         arr.flags.writeable = False
---> 88     return func(*args, **kwargs)
     89 finally:
     90     for arr, old_flag in zip(array_args, old_flags):

File ~/work/bobiac/bobiac/.venv/lib/python3.12/site-packages/shapely/set_operations.py:168, in intersection(a, b, grid_size, **kwargs)
    164         raise ValueError("grid_size parameter only accepts scalar values")
    166     return lib.intersection_prec(a, b, grid_size, **kwargs)
--> 168 return lib.intersection(a, b, **kwargs)

KeyboardInterrupt: 

Compute the winter samples#

k_winter_warm_ec = ripleys_k_function(
    winter_warm_iac, winter_warm_bob, radii=k_radii, area=400, edge_correction=True
)

k_winter_medium_ec = ripleys_k_function(
    winter_medium_iac, winter_medium_bob, radii=k_radii, area=400, edge_correction=True
)

k_winter_cold_ec = ripleys_k_function(
    winter_cold_iac, winter_cold_bob, radii=k_radii, area=400, edge_correction=True
)

Plot a fall sample with edge correction#

plot_ripleys_k_ec(k_fall_warm, k_fall_warm_ec, k_radii, label="Fall warm")

Plot a winter sample with edge correction#

plot_ripleys_k_ec(k_winter_cold, k_winter_cold_ec, k_radii, label="Winter cold")

5. Validations - the null distribution#

Calculate the null distribution of the nearest neighbor function#

def getnulldist(n: int, radii: np.ndarray, area: float) -> np.ndarray:
    """
    Computes the expected nearest neighbor distances under a null model for a set of 2D points.
    Parameters:
    - points2: array of shape (n_points, 2) representing the coordinates of the points to compute nearest n
    neighbor distances to
    - radii: array-like of radii at which to evaluate the null model

    Returns:
    - mu0: array of expected nearest neighbor distances at each radius
    """

    mu0 = np.empty(radii.shape, dtype=float)

    for i, r in enumerate(radii):
        mu0[i] = 1 - np.exp(-(n / area) * np.pi * (r**2))

    return mu0

✍️ Exercise: Try different values for n and area#

You can plot the cell directly below to inspect the effect on nulldist.
Try out different values.
Think what would be the correct values for the fall samples!

n = 1345345  # number of points in the dataset
area = 4056780  # area of the FOV

nulldist = getnulldist(
    n=n, area=area, radii=radii
)  # These are not good values for n and area. Change them!

Plot the fall samples#

plot_nn_function(s_fall_cold, radii, show=False, label="Fall cold", line=":")
plot_nn_function(s_fall_med, radii, show=False, label="Fall medium", line="--")
plot_nn_function(
    s_fall_warm, radii, show=False, label="Fall warm", line="-", nulldist=nulldist
)

Plot the winter samples#

plot_nn_function(s_winter_cold, radii, show=False, label="Winter cold", line=":")
plot_nn_function(s_winter_medium, radii, show=False, label="Winter medium", line="--")
plot_nn_function(
    s_winter_warm, radii, show=True, label="Winter warm", nulldist=nulldist
)

6. Validation - Monte-Carlo based null-hypothesis testing#

✍️ Exercise, code along: Blindly throw darts at a 20 x 20 cm board#

seed = 5
rng = np.random.default_rng(seed)
board_x = 20
board_y = 20
n_darts = 500
x = rng.uniform(0, board_x, size=n_darts)
y = rng.uniform(0, board_y, size=n_darts)

plt.scatter(x, y)
plt.xlabel("x [au]")
plt.ylabel("y [au]")
plt.title("Darts on a board")
plt.gca().set_aspect("equal")

6.1 Simulate under the null hypothesis#

def sample_uniform_points_batch(
    ndraw: int, num_points: int = 400, x_max: int = 10, y_max: int = 10, seed: int = 42
) -> np.ndarray:
    """
    Generate *ndraw* independent batches of 2-D points drawn from independent
    **uniform random** distributions on the rectangle ``[0, x_max] × [0, y_max]``.

    Unlike a “uniform grid,” the points are *randomly* scattered—each position
    inside the rectangle has equal probability of being chosen.

    Parameters
    ----------
    ndraw : int
        Number of batches to generate.
    num_points : int
        Number of points in each batch.
    x_max : int
        Maximum x-coordinate value.
    y_max : int
        Maximum y-coordinate value.
    seed : int
        Random seed for reproducibility.

    Returns
    -------
    np.ndarray
        Array of shape (ndraw, num_points, 2) containing the generated points.
    """
    rng = np.random.default_rng(seed)

    x = rng.uniform(0, x_max, size=(ndraw, num_points, 1))
    y = rng.uniform(0, y_max, size=(ndraw, num_points, 1))

    return np.concatenate([x, y], axis=-1)
ndraw = 1000  # Sufficiently large for statistics.
num_points_c1 = len(fall_med_iac)  # Same number of points as c1
num_points_c2 = len(fall_med_bob)  # Same number of points as c2

# Generate random points for multiple samples, channel 1
points_multiple_c1 = sample_uniform_points_batch(
    ndraw, num_points=num_points_c1, x_max=20, y_max=20, seed=42
)

# channel 2
points_multiple_c2 = sample_uniform_points_batch(
    ndraw, num_points=num_points_c2, x_max=20, y_max=20, seed=43
)

Inspect the random results#

def plotpoints(points: np.ndarray, title: str = "Monte carlo simulated points") -> None:
    """Plots a set of 2D points.
    Parameters:
    - points: array of shape (n_points, 2) representing the coordinates of the points
    - title: title of the plot

    """
    plt.scatter(points[:, 0], points[:, 1], c="k", s=10)
    plt.title(label=title)

    plt.xlabel("x [au]")
    plt.ylabel("y [au]")
    ax = plt.gca()
    ax.set_aspect("equal", adjustable="box")
sample_i = 999
print(points_multiple_c1[sample_i].shape)
print(points_multiple_c2[sample_i].shape)


plotpoints(points_multiple_c1[sample_i])
plt.show()
plotpoints(points_multiple_c2[sample_i])
plt.show()

6.2 Validate - Mean nearest neighbor distance#

Calculate the mean nearest neighbor distance for each random dataset#

nn_multiple = np.zeros(ndraw, dtype=float)
for i in range(ndraw):  # for all randomly generated samples
    _, mean_i = meanNN(
        points_multiple_c1[i], points_multiple_c2[i]
    )  # calculate their mean
    nn_multiple[i] = mean_i

Define significance level for later testing#

alpha = 5  # significance level. Only 5% of simulations under the null hypothesis will lie outside a 95% significance level
bound_low = alpha / 2
bound_upper = 100 - bound_low

# Compute percentiles for mean nearest neighbor distance of the simulated points
percentile_low = np.percentile(
    nn_multiple, bound_low
)  # Only alpha/2 % of simulations lie below this value
percentile_high = np.percentile(
    nn_multiple, bound_upper
)  # Only alpha/2 % of simulations lie above this value

Fall samples#

plt.hist(nndist_fall_med, label="Fall medium", alpha=0.5, bins=25)
plt.hist(nndist_fall_cold, label="Fall cold", alpha=0.5, bins=25)
plt.hist(nndist_fall_warm, label="Fall warm", alpha=0.5, bins=25)
plt.xlabel("distance to nearest neighbor [a.u.]")
plt.axvline(meannn_fall_med, c="k", linestyle="--", label="Fall medium")
plt.axvline(meannn_fall_cold, c="k", linestyle=":", label="Fall cold")
plt.axvline(meannn_fall_warm, c="k", linestyle="-", label="Fall warm")

# Now we display the confidence bounds
plt.axvspan(
    percentile_low,
    percentile_high,
    color="gray",
    alpha=0.5,
    label=f"confidence band {bound_low}-{bound_upper}%",
)
plt.legend()

Winter samples#

plt.hist(nndist_winter_cold, label="Winter cold", alpha=0.5, bins=25)
plt.hist(nndist_winter_warm, label="Winter warm", alpha=0.5, bins=25)
plt.xlabel("distance to nearest neighbor [a.u.]")
plt.axvline(meannn_winter_cold, c="k", linestyle=":", label="Winter cold")
plt.axvline(meannn_winter_warm, c="k", linestyle="-", label="Winter warm")
plt.axvspan(
    percentile_low,
    percentile_high,
    color="gray",
    alpha=0.5,
    label=f"confidence band {bound_low}-{bound_upper}%",
)
plt.legend()

6.3 Validate - Nearest neighbor function#

Plot confidence intervals for the nearest neighbor function:#

Hide code cell content

def plot_envelope(
    s: np.ndarray,
    s_multiple: np.ndarray,
    radii: np.ndarray,
    alpha: float = 5,
    label: str = "data",
    metric: str = "K(r)",
    show: bool = True,
    linestyle: str = "-",
):
    """
    Plots the envelope of a metric (e.g., Nearest neighbor function, or K-function) with confidence intervals.
    Parameters:
    - s: array of metric values for the observed data
    - s_multiple: array of metric values for multiple realizations (i.e. of monte Carlo simulations)
    - radii: array of radii at which the metric is evaluated
    - alpha: significance level for the confidence interval (default is 5)
    - label: label for the observed data in the plot
    - metric: name of the metric being plotted (default is "K(r)" for ripley's K function)
    - show: whether to show the plot (default is True).
    If False, the plot is not shown but can be saved or further processed.
    """
    if show:  # show confidence intervals
        ndraw = len(s_multiple)
        s_multiple_mean = np.mean(s_multiple, axis=0)
        bound_low = alpha / 2
        bound_upper = 100 - bound_low
        percentiles_low = np.percentile(s_multiple, bound_low, axis=0)
        percentiles_high = np.percentile(s_multiple, bound_upper, axis=0)

        plt.plot(  # plot the mean of the multiple realizations
            radii,
            s_multiple_mean,
            color="grey",
            label=f"mean of {ndraw} realizations",
            zorder=0,
        )

        plt.fill_between(  # fill the area between the percentiles
            radii,
            percentiles_low,
            percentiles_high,
            color="grey",
            alpha=0.3,
            label=f"{bound_low}-{bound_upper}% quantile range",
        )

    plt.plot(
        radii, s, label=label, color="k", linestyle=linestyle
    )  # plot the observed data
    plt.legend()
    plt.xlabel("r")
    plt.ylabel(f"{metric}")
    # if show:
    #     plt.show()

Compute the nearest neighbor function for all random samples#

s_multiple = np.zeros([ndraw, len(radii)], dtype=float)
for i in range(ndraw):  # for all randomly generated samples
    s_multiple[i] = nearest_neighbor_function(
        points_multiple_c1[i], points_multiple_c2[i], radii
    )

Fall samples#

plot_envelope(
    s_fall_cold,
    s_multiple,
    radii=radii,
    metric="S(r)",
    label="Fall cold",
    linestyle=":",
    show=False,
)
plot_envelope(
    s_fall_med,
    s_multiple,
    radii=radii,
    metric="S(r)",
    label="Fall medium",
    linestyle="--",
    show=False,
)

plot_envelope(
    s_fall_warm,
    s_multiple,
    radii=radii,
    metric="S(r)",
    linestyle="-",
    label="Fall warm",
    show=True,
)

Winter samples#

plot_envelope(
    s_winter_cold,
    s_multiple,
    radii=radii,
    metric="S(r)",
    label="Winter cold",
    linestyle=":",
    show=False,
)

plot_envelope(
    s_winter_warm,
    s_multiple,
    radii=radii,
    metric="S(r)",
    label="Winter warm",
    show=False,
)

6.4 Validate - Ripley’s K#

Here, we can reuse the sample plotting function as defined for the nearest neighbor function

Compute Ripley’s K for all randomly generated samples#

ks_multiple = np.zeros([ndraw, len(k_radii)], dtype=float)
for i in range(ndraw):
    ks_multiple[i] = ripleys_k_function(
        points_multiple_c1[i],
        points_multiple_c2[i],
        radii=k_radii,
        area=400,
        edge_correction=False,
    )

Fall samples#

plot_envelope(
    k_fall_cold,
    ks_multiple,
    radii=k_radii,
    metric="K(r)",
    label="Fall cold",
    show=False,
    linestyle=":",
)
plot_envelope(
    k_fall_med,
    ks_multiple,
    radii=k_radii,
    metric="K(r)",
    label="Fall medium",
    linestyle="--",
    show=False,
)

plot_envelope(
    k_fall_warm, ks_multiple, radii=k_radii, metric="K(r)", label="Fall warm", show=True
)

Winter samples#

plot_envelope(
    k_winter_cold,
    ks_multiple,
    radii=k_radii,
    metric="K(r)",
    label="Winter cold",
    linestyle=":",
    show=False,
)

plot_envelope(
    k_winter_warm,
    ks_multiple,
    radii=k_radii,
    metric="K(r)",
    label="Winter warm",
    show=False,
)