Source code for maup.intersections

import pandas
from geopandas import GeoDataFrame

from .crs import require_same_crs
from .indexed_geometries import IndexedGeometries
from .indices import get_geometries_with_range_index


[docs] @require_same_crs def intersections(sources, targets, output_type="geoseries", area_cutoff=None): """Computes all of the nonempty intersections between two sets of geometries. By default, the returned `~geopandas.GeoSeries` will have a MultiIndex, where the geometry at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]`` (if it is not empty). If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame with "source" and "target" columns containing the indices i,j, respectively, for the intersection of ``sources[i]`` and ``targets[j]`` :param sources: geometries :type sources: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame` :param targets: geometries :type targets: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame` :rtype: :class:`~geopandas.GeoSeries` :param area_cutoff: (optional) if provided, only return intersections with area greater than ``area_cutoff`` :type area_cutoff: Number or None """ reindexed_sources = get_geometries_with_range_index(sources) reindexed_targets = get_geometries_with_range_index(targets) spatially_indexed_sources = IndexedGeometries(reindexed_sources) records = [ # Flip i, j to j, i so that the index is ["source", "target"] (sources.index[j], targets.index[i], geometry) for i, j, geometry in spatially_indexed_sources.enumerate_intersections( reindexed_targets ) ] df = GeoDataFrame( records, columns=["source", "target", "geometry"], crs=sources.crs ) df = df.sort_values(by=["source", "target"]).reset_index(drop=True) geometries = df.set_index(["source", "target"]).geometry geometries.sort_index(inplace=True) if area_cutoff is not None: df = df[df.area > area_cutoff].reset_index(drop=True) geometries = geometries[geometries.area > area_cutoff] return df if output_type == "geodataframe" else geometries
[docs] def prorate(relationship, data, weights, aggregate_by="sum"): """ Prorate data from one set of geometries to another, using their `~maup.intersections` or an assignment. :param relationship: the :func:`~maup.intersections` of the geometries you are getting data from (sources) and the geometries you are moving the data to; or, a series assigning sources to targets :type inters: :class:`geopandas.GeoSeries` :param data: the data you want to move (must be indexed the same as the source geometries) :type data: :class:`pandas.Series` or :class:`pandas.DataFrame` :param weights: the weights to use when prorating from ``sources`` to ``inters`` :type weights: :class:`pandas.Series` :param function aggregate_by: (optional) the function to use for aggregating from ``inters`` to ``targets``. The default is ``"sum"``. """ if relationship.index.nlevels > 1: source_assignment = relationship.index.get_level_values("source").to_series( index=relationship.index ) else: source_assignment = relationship weights = weights.reindex_like(relationship) if isinstance(data, pandas.DataFrame): disagreggated = pandas.DataFrame( { column: source_assignment.map(data[column]) * weights for column in data.columns } ) elif isinstance(data, pandas.Series): disagreggated = source_assignment.map(data) * weights else: raise TypeError("Data must be a Series or DataFrame") if isinstance(disagreggated.index, pandas.MultiIndex): aggregated = disagreggated.groupby(level="target").agg(aggregate_by) else: aggregated = disagreggated return aggregated