import pandas
from geopandas import GeoDataFrame
from .crs import require_same_crs
from .indexed_geometries import IndexedGeometries
from .indices import get_geometries_with_range_index
[docs]
@require_same_crs
def intersections(sources, targets, output_type="geoseries", area_cutoff=None):
"""Computes all of the nonempty intersections between two sets of geometries.
By default, the returned `~geopandas.GeoSeries` will have a MultiIndex, where the
geometry at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]``
(if it is not empty).
If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame
with "source" and "target" columns containing the indices i,j, respectively, for the
intersection of ``sources[i]`` and ``targets[j]``
:param sources: geometries
:type sources: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
:param targets: geometries
:type targets: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
:rtype: :class:`~geopandas.GeoSeries`
:param area_cutoff: (optional) if provided, only return intersections with
area greater than ``area_cutoff``
:type area_cutoff: Number or None
"""
reindexed_sources = get_geometries_with_range_index(sources)
reindexed_targets = get_geometries_with_range_index(targets)
spatially_indexed_sources = IndexedGeometries(reindexed_sources)
records = [
# Flip i, j to j, i so that the index is ["source", "target"]
(sources.index[j], targets.index[i], geometry)
for i, j, geometry in spatially_indexed_sources.enumerate_intersections(
reindexed_targets
)
]
df = GeoDataFrame(
records, columns=["source", "target", "geometry"], crs=sources.crs
)
df = df.sort_values(by=["source", "target"]).reset_index(drop=True)
geometries = df.set_index(["source", "target"]).geometry
geometries.sort_index(inplace=True)
if area_cutoff is not None:
df = df[df.area > area_cutoff].reset_index(drop=True)
geometries = geometries[geometries.area > area_cutoff]
return df if output_type == "geodataframe" else geometries
[docs]
def prorate(relationship, data, weights, aggregate_by="sum"):
"""
Prorate data from one set of geometries to another, using their
`~maup.intersections` or an assignment.
:param relationship: the :func:`~maup.intersections` of the geometries you are
getting data from (sources) and the geometries you are moving the data
to; or, a series assigning sources to targets
:type inters: :class:`geopandas.GeoSeries`
:param data: the data you want to move (must be indexed the same as
the source geometries)
:type data: :class:`pandas.Series` or :class:`pandas.DataFrame`
:param weights: the weights to use when prorating from ``sources`` to
``inters``
:type weights: :class:`pandas.Series`
:param function aggregate_by: (optional) the function to use for aggregating from
``inters`` to ``targets``. The default is ``"sum"``.
"""
if relationship.index.nlevels > 1:
source_assignment = relationship.index.get_level_values("source").to_series(
index=relationship.index
)
else:
source_assignment = relationship
weights = weights.reindex_like(relationship)
if isinstance(data, pandas.DataFrame):
disagreggated = pandas.DataFrame(
{
column: source_assignment.map(data[column]) * weights
for column in data.columns
}
)
elif isinstance(data, pandas.Series):
disagreggated = source_assignment.map(data) * weights
else:
raise TypeError("Data must be a Series or DataFrame")
if isinstance(disagreggated.index, pandas.MultiIndex):
aggregated = disagreggated.groupby(level="target").agg(aggregate_by)
else:
aggregated = disagreggated
return aggregated