Source code for maup.intersections

import pandas
from geopandas import GeoDataFrame

from .crs import require_same_crs
from .indexed_geometries import IndexedGeometries
from .indices import get_geometries_with_range_index



[docs]
@require_same_crs
def intersections(sources, targets, output_type="geoseries", area_cutoff=None):
    """Computes all of the nonempty intersections between two sets of geometries.
    By default, the returned `~geopandas.GeoSeries` will have a MultiIndex, where the
    geometry at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]``
    (if it is not empty).
    If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame
    with "source" and "target" columns containing the indices i,j, respectively, for the
    intersection of ``sources[i]`` and ``targets[j]``
    :param sources: geometries
    :type sources: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
    :param targets: geometries
    :type targets: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
    :rtype: :class:`~geopandas.GeoSeries`
    :param area_cutoff: (optional) if provided, only return intersections with
        area greater than ``area_cutoff``
    :type area_cutoff: Number or None
    """

    reindexed_sources = get_geometries_with_range_index(sources)
    reindexed_targets = get_geometries_with_range_index(targets)
    spatially_indexed_sources = IndexedGeometries(reindexed_sources)

    records = [
        # Flip i, j to j, i so that the index is ["source", "target"]
        (sources.index[j], targets.index[i], geometry)
        for i, j, geometry in spatially_indexed_sources.enumerate_intersections(
            reindexed_targets
        )
    ]

    df = GeoDataFrame(
        records, columns=["source", "target", "geometry"], crs=sources.crs
    )
    df = df.sort_values(by=["source", "target"]).reset_index(drop=True)

    geometries = df.set_index(["source", "target"]).geometry
    geometries.sort_index(inplace=True)

    if area_cutoff is not None:
        df = df[df.area > area_cutoff].reset_index(drop=True)
        geometries = geometries[geometries.area > area_cutoff]

    return df if output_type == "geodataframe" else geometries




[docs]
def prorate(relationship, data, weights, aggregate_by="sum"):
    """
    Prorate data from one set of geometries to another, using their
    `~maup.intersections` or an assignment.

    :param relationship: the :func:`~maup.intersections` of the geometries you are
        getting data from (sources) and the geometries you are moving the data
        to; or, a series assigning sources to targets
    :type inters: :class:`geopandas.GeoSeries`
    :param data: the data you want to move (must be indexed the same as
        the source geometries)
    :type data: :class:`pandas.Series` or :class:`pandas.DataFrame`
    :param weights: the weights to use when prorating from ``sources`` to
        ``inters``
    :type weights: :class:`pandas.Series`
    :param function aggregate_by: (optional) the function to use for aggregating from
        ``inters`` to ``targets``. The default is ``"sum"``.
    """
    if relationship.index.nlevels > 1:
        source_assignment = relationship.index.get_level_values("source").to_series(
            index=relationship.index
        )
    else:
        source_assignment = relationship

    weights = weights.reindex_like(relationship)

    if isinstance(data, pandas.DataFrame):
        disagreggated = pandas.DataFrame(
            {
                column: source_assignment.map(data[column]) * weights
                for column in data.columns
            }
        )
    elif isinstance(data, pandas.Series):
        disagreggated = source_assignment.map(data) * weights
    else:
        raise TypeError("Data must be a Series or DataFrame")

    if isinstance(disagreggated.index, pandas.MultiIndex):
        aggregated = disagreggated.groupby(level="target").agg(aggregate_by)
    else:
        aggregated = disagreggated

    return aggregated