gdal/alg/polygonize.cpp

/******************************************************************************
 * Project:  GDAL
 * Purpose:  Raster to Polygon Converter
 * Author:   Frank Warmerdam, warmerdam@pobox.com
 *
 ******************************************************************************
 * Copyright (c) 2008, Frank Warmerdam
 * Copyright (c) 2009-2020, Even Rouault <even dot rouault at spatialys.com>
 *
 * SPDX-License-Identifier: MIT
 ****************************************************************************/

#include "cpl_port.h"
#include "gdal_alg.h"

#include <stddef.h>
#include <stdio.h>
#include <cstdlib>
#include <string.h>

#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <utility>
#include <vector>

#include "gdal_alg_priv.h"
#include "gdal.h"
#include "ogr_api.h"
#include "ogr_core.h"
#include "cpl_conv.h"
#include "cpl_error.h"
#include "cpl_progress.h"
#include "cpl_string.h"
#include "cpl_vsi.h"

#include "polygonize_polygonizer.h"

using namespace gdal::polygonizer;

/************************************************************************/
/*                          GPMaskImageData()                           */
/*                                                                      */
/*      Mask out image pixels to a special nodata value if the mask     */
/*      band is zero.                                                   */
/************************************************************************/

template <class DataType>
static CPLErr GPMaskImageData(GDALRasterBandH hMaskBand, GByte *pabyMaskLine,
                              int iY, int nXSize, DataType *panImageLine)

{
    const CPLErr eErr = GDALRasterIO(hMaskBand, GF_Read, 0, iY, nXSize, 1,
                                     pabyMaskLine, nXSize, 1, GDT_Byte, 0, 0);
    if (eErr != CE_None)
        return eErr;

    for (int i = 0; i < nXSize; i++)
    {
        if (pabyMaskLine[i] == 0)
            panImageLine[i] = GP_NODATA_MARKER;
    }

    return CE_None;
}

/************************************************************************/
/*                           GDALPolygonizeT()                          */
/************************************************************************/

template <class DataType, class EqualityTest>
static CPLErr GDALPolygonizeT(GDALRasterBandH hSrcBand,
                              GDALRasterBandH hMaskBand, OGRLayerH hOutLayer,
                              int iPixValField, char **papszOptions,
                              GDALProgressFunc pfnProgress, void *pProgressArg,
                              GDALDataType eDT)

{
    VALIDATE_POINTER1(hSrcBand, "GDALPolygonize", CE_Failure);
    VALIDATE_POINTER1(hOutLayer, "GDALPolygonize", CE_Failure);

    if (pfnProgress == nullptr)
        pfnProgress = GDALDummyProgress;

    const int nConnectedness =
        CSLFetchNameValue(papszOptions, "8CONNECTED") ? 8 : 4;

    /* -------------------------------------------------------------------- */
    /*      Confirm our output layer will support feature creation.         */
    /* -------------------------------------------------------------------- */
    if (!OGR_L_TestCapability(hOutLayer, OLCSequentialWrite))
    {
        CPLError(CE_Failure, CPLE_AppDefined,
                 "Output feature layer does not appear to support creation "
                 "of features in GDALPolygonize().");
        return CE_Failure;
    }

    /* -------------------------------------------------------------------- */
    /*      Allocate working buffers.                                       */
    /* -------------------------------------------------------------------- */
    const int nXSize = GDALGetRasterBandXSize(hSrcBand);
    const int nYSize = GDALGetRasterBandYSize(hSrcBand);
    if (nXSize > std::numeric_limits<int>::max() - 2)
    {
        CPLError(CE_Failure, CPLE_AppDefined, "Too wide raster");
        return CE_Failure;
    }

    DataType *panLastLineVal =
        static_cast<DataType *>(VSI_MALLOC2_VERBOSE(sizeof(DataType), nXSize));
    DataType *panThisLineVal =
        static_cast<DataType *>(VSI_MALLOC2_VERBOSE(sizeof(DataType), nXSize));
    GInt32 *panLastLineId =
        static_cast<GInt32 *>(VSI_MALLOC2_VERBOSE(sizeof(GInt32), nXSize));
    GInt32 *panThisLineId =
        static_cast<GInt32 *>(VSI_MALLOC2_VERBOSE(sizeof(GInt32), nXSize));

    GByte *pabyMaskLine = static_cast<GByte *>(VSI_MALLOC_VERBOSE(nXSize));

    if (panLastLineVal == nullptr || panThisLineVal == nullptr ||
        panLastLineId == nullptr || panThisLineId == nullptr ||
        pabyMaskLine == nullptr)
    {
        CPLFree(panThisLineId);
        CPLFree(panLastLineId);
        CPLFree(panThisLineVal);
        CPLFree(panLastLineVal);
        CPLFree(pabyMaskLine);
        return CE_Failure;
    }

    /* -------------------------------------------------------------------- */
    /*      Get the geotransform, if there is one, so we can convert the    */
    /*      vectors into georeferenced coordinates.                         */
    /* -------------------------------------------------------------------- */
    double adfGeoTransform[6] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
    bool bGotGeoTransform = false;
    const char *pszDatasetForGeoRef =
        CSLFetchNameValue(papszOptions, "DATASET_FOR_GEOREF");
    if (pszDatasetForGeoRef)
    {
        GDALDatasetH hSrcDS = GDALOpen(pszDatasetForGeoRef, GA_ReadOnly);
        if (hSrcDS)
        {
            bGotGeoTransform =
                GDALGetGeoTransform(hSrcDS, adfGeoTransform) == CE_None;
            GDALClose(hSrcDS);
        }
    }
    else
    {
        GDALDatasetH hSrcDS = GDALGetBandDataset(hSrcBand);
        if (hSrcDS)
            bGotGeoTransform =
                GDALGetGeoTransform(hSrcDS, adfGeoTransform) == CE_None;
    }
    if (!bGotGeoTransform)
    {
        adfGeoTransform[0] = 0;
        adfGeoTransform[1] = 1;
        adfGeoTransform[2] = 0;
        adfGeoTransform[3] = 0;
        adfGeoTransform[4] = 0;
        adfGeoTransform[5] = 1;
    }

    /* -------------------------------------------------------------------- */
    /*      The first pass over the raster is only used to build up the     */
    /*      polygon id map so we will know in advance what polygons are     */
    /*      what on the second pass.                                        */
    /* -------------------------------------------------------------------- */
    GDALRasterPolygonEnumeratorT<DataType, EqualityTest> oFirstEnum(
        nConnectedness);

    CPLErr eErr = CE_None;

    for (int iY = 0; eErr == CE_None && iY < nYSize; iY++)
    {
        eErr = GDALRasterIO(hSrcBand, GF_Read, 0, iY, nXSize, 1, panThisLineVal,
                            nXSize, 1, eDT, 0, 0);

        if (eErr == CE_None && hMaskBand != nullptr)
            eErr = GPMaskImageData(hMaskBand, pabyMaskLine, iY, nXSize,
                                   panThisLineVal);

        if (eErr != CE_None)
            break;

        if (iY == 0)
            eErr = oFirstEnum.ProcessLine(nullptr, panThisLineVal, nullptr,
                                          panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        else
            eErr = oFirstEnum.ProcessLine(panLastLineVal, panThisLineVal,
                                          panLastLineId, panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;

        if (eErr != CE_None)
            break;

        // Swap lines.
        std::swap(panLastLineVal, panThisLineVal);
        std::swap(panLastLineId, panThisLineId);

        /* --------------------------------------------------------------------
         */
        /*      Report progress, and support interrupts. */
        /* --------------------------------------------------------------------
         */
        if (!pfnProgress(0.10 * ((iY + 1) / static_cast<double>(nYSize)), "",
                         pProgressArg))
        {
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
            eErr = CE_Failure;
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Make a pass through the maps, ensuring every polygon id         */
    /*      points to the final id it should use, not an intermediate       */
    /*      value.                                                          */
    /* -------------------------------------------------------------------- */
    if (eErr == CE_None)
        oFirstEnum.CompleteMerges();

    /* -------------------------------------------------------------------- */
    /*      We will use a new enumerator for the second pass primarily      */
    /*      so we can preserve the first pass map.                          */
    /* -------------------------------------------------------------------- */
    GDALRasterPolygonEnumeratorT<DataType, EqualityTest> oSecondEnum(
        nConnectedness);

    OGRPolygonWriter<DataType> oPolygonWriter{hOutLayer, iPixValField,
                                              adfGeoTransform};
    Polygonizer<GInt32, DataType> oPolygonizer{-1, &oPolygonWriter};
    TwoArm *paoLastLineArm =
        static_cast<TwoArm *>(VSI_CALLOC_VERBOSE(sizeof(TwoArm), nXSize + 2));
    TwoArm *paoThisLineArm =
        static_cast<TwoArm *>(VSI_CALLOC_VERBOSE(sizeof(TwoArm), nXSize + 2));

    if (paoThisLineArm == nullptr || paoLastLineArm == nullptr)
    {
        eErr = CE_Failure;
    }
    else
    {
        for (int i = 0; i < nXSize + 2; ++i)
        {
            paoLastLineArm[i].poPolyInside = oPolygonizer.getTheOuterPolygon();
        }
    }

    /* ==================================================================== */
    /*      Second pass during which we will actually collect polygon       */
    /*      edges as geometries.                                            */
    /* ==================================================================== */
    for (int iY = 0; eErr == CE_None && iY < nYSize + 1; iY++)
    {
        /* --------------------------------------------------------------------
         */
        /*      Read the image data. */
        /* --------------------------------------------------------------------
         */
        if (iY < nYSize)
        {
            eErr = GDALRasterIO(hSrcBand, GF_Read, 0, iY, nXSize, 1,
                                panThisLineVal, nXSize, 1, eDT, 0, 0);
            if (eErr == CE_None && hMaskBand != nullptr)
                eErr = GPMaskImageData(hMaskBand, pabyMaskLine, iY, nXSize,
                                       panThisLineVal);
        }

        if (eErr != CE_None)
            continue;

        /* --------------------------------------------------------------------
         */
        /*      Determine what polygon the various pixels belong to (redoing */
        /*      the same thing done in the first pass above). */
        /* --------------------------------------------------------------------
         */
        if (iY == nYSize)
        {
            for (int iX = 0; iX < nXSize; iX++)
                panThisLineId[iX] =
                    decltype(oPolygonizer)::THE_OUTER_POLYGON_ID;
        }
        else if (iY == 0)
        {
            eErr = oSecondEnum.ProcessLine(nullptr, panThisLineVal, nullptr,
                                           panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        }
        else
        {
            eErr = oSecondEnum.ProcessLine(panLastLineVal, panThisLineVal,
                                           panLastLineId, panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        }

        if (eErr != CE_None)
            continue;

        if (iY < nYSize)
        {
            for (int iX = 0; iX < nXSize; iX++)
            {
                // TODO: maybe we can reserve -1 as the lookup result for -1 polygon id in the panPolyIdMap,
                //       so the this expression becomes: panLastLineId[iX] = *(oFirstEnum.panPolyIdMap + panThisLineId[iX]).
                //       This would eliminate the condition checking.
                panLastLineId[iX] =
                    panThisLineId[iX] == -1
                        ? -1
                        : oFirstEnum.panPolyIdMap[panThisLineId[iX]];
            }

            if (!oPolygonizer.processLine(panLastLineId, panLastLineVal,
                                          paoThisLineArm, paoLastLineArm, iY,
                                          nXSize))
            {
                eErr = CE_Failure;
            }
            else
            {
                eErr = oPolygonWriter.getErr();
            }
        }
        else
        {
            if (!oPolygonizer.processLine(panThisLineId, panLastLineVal,
                                          paoThisLineArm, paoLastLineArm, iY,
                                          nXSize))
            {
                eErr = CE_Failure;
            }
            else
            {
                eErr = oPolygonWriter.getErr();
            }
        }

        if (eErr != CE_None)
            continue;

        /* --------------------------------------------------------------------
         */
        /*      Swap pixel value, and polygon id lines to be ready for the */
        /*      next line. */
        /* --------------------------------------------------------------------
         */
        std::swap(panLastLineVal, panThisLineVal);
        std::swap(panLastLineId, panThisLineId);
        std::swap(paoThisLineArm, paoLastLineArm);

        /* --------------------------------------------------------------------
         */
        /*      Report progress, and support interrupts. */
        /* --------------------------------------------------------------------
         */
        if (!pfnProgress(0.10 + 0.90 * ((iY + 1) / static_cast<double>(nYSize)),
                         "", pProgressArg))
        {
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
            eErr = CE_Failure;
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Cleanup                                                         */
    /* -------------------------------------------------------------------- */
    CPLFree(panThisLineId);
    CPLFree(panLastLineId);
    CPLFree(panThisLineVal);
    CPLFree(panLastLineVal);
    CPLFree(paoThisLineArm);
    CPLFree(paoLastLineArm);
    CPLFree(pabyMaskLine);

    return eErr;
}

/******************************************************************************/
/*                          GDALFloatEquals()                                 */
/* Code from:                                                                 */
/* http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm  */
/******************************************************************************/
GBool GDALFloatEquals(float A, float B)
{
    // This function will allow maxUlps-1 floats between A and B.
    const int maxUlps = MAX_ULPS;

    // Make sure maxUlps is non-negative and small enough that the default NAN
    // won't compare as equal to anything.
#if MAX_ULPS <= 0 || MAX_ULPS >= 4 * 1024 * 1024
#error "Invalid MAX_ULPS"
#endif

    // This assignation could violate strict aliasing. It causes a warning with
    // gcc -O2. Use of memcpy preferred. Credits for Even Rouault. Further info
    // at http://trac.osgeo.org/gdal/ticket/4005#comment:6
    int aInt = 0;
    memcpy(&aInt, &A, 4);

    // Make aInt lexicographically ordered as a twos-complement int.
    if (aInt < 0)
        aInt = INT_MIN - aInt;

    // Make bInt lexicographically ordered as a twos-complement int.
    int bInt = 0;
    memcpy(&bInt, &B, 4);

    if (bInt < 0)
        bInt = INT_MIN - bInt;
#ifdef COMPAT_WITH_ICC_CONVERSION_CHECK
    const int intDiff =
        abs(static_cast<int>(static_cast<GUIntBig>(static_cast<GIntBig>(aInt) -
                                                   static_cast<GIntBig>(bInt)) &
                             0xFFFFFFFFU));
#else
    // To make -ftrapv happy we compute the diff on larger type and
    // cast down later.
    const int intDiff = abs(static_cast<int>(static_cast<GIntBig>(aInt) -
                                             static_cast<GIntBig>(bInt)));
#endif
    if (intDiff <= maxUlps)
        return true;
    return false;
}

/************************************************************************/
/*                           GDALPolygonize()                           */
/************************************************************************/

/**
 * Create polygon coverage from raster data.
 *
 * This function creates vector polygons for all connected regions of pixels in
 * the raster sharing a common pixel value.  Optionally each polygon may be
 * labeled with the pixel value in an attribute.  Optionally a mask band
 * can be provided to determine which pixels are eligible for processing.
 *
 * Note that currently the source pixel band values are read into a
 * signed 64bit integer buffer (Int64), so floating point or complex
 * bands will be implicitly truncated before processing. If you want to use a
 * version using 32bit float buffers, see GDALFPolygonize().
 *
 * Polygon features will be created on the output layer, with polygon
 * geometries representing the polygons.  The polygon geometries will be
 * in the georeferenced coordinate system of the image (based on the
 * geotransform of the source dataset).  It is acceptable for the output
 * layer to already have features.  Note that GDALPolygonize() does not
 * set the coordinate system on the output layer.  Application code should
 * do this when the layer is created, presumably matching the raster
 * coordinate system.
 *
 * The algorithm used attempts to minimize memory use so that very large
 * rasters can be processed.  However, if the raster has many polygons
 * or very large/complex polygons, the memory use for holding polygon
 * enumerations and active polygon geometries may grow to be quite large.
 *
 * The algorithm will generally produce very dense polygon geometries, with
 * edges that follow exactly on pixel boundaries for all non-interior pixels.
 * For non-thematic raster data (such as satellite images) the result will
 * essentially be one small polygon per pixel, and memory and output layer
 * sizes will be substantial.  The algorithm is primarily intended for
 * relatively simple thematic imagery, masks, and classification results.
 *
 * @param hSrcBand the source raster band to be processed.
 * @param hMaskBand an optional mask band.  All pixels in the mask band with a
 * value other than zero will be considered suitable for collection as
 * polygons.
 * @param hOutLayer the vector feature layer to which the polygons should
 * be written.
 * @param iPixValField the attribute field index indicating the feature
 * attribute into which the pixel value of the polygon should be written. Or
 * -1 to indicate that the pixel value must not be written.
 * @param papszOptions a name/value list of additional options
 * <ul>
 * <li>8CONNECTED=8: May be set to "8" to use 8 connectedness.
 * Otherwise 4 connectedness will be applied to the algorithm</li>
 * <li>DATASET_FOR_GEOREF=dataset_name: Name of a dataset from which to read
 * the geotransform. This useful if hSrcBand has no related dataset, which is
 * typical for mask bands.</li>
 * </ul>
 * @param pfnProgress callback for reporting algorithm progress matching the
 * GDALProgressFunc() semantics.  May be NULL.
 * @param pProgressArg callback argument passed to pfnProgress.
 *
 * @return CE_None on success or CE_Failure on a failure.
 */

CPLErr CPL_STDCALL GDALPolygonize(GDALRasterBandH hSrcBand,
                                  GDALRasterBandH hMaskBand,
                                  OGRLayerH hOutLayer, int iPixValField,
                                  char **papszOptions,
                                  GDALProgressFunc pfnProgress,
                                  void *pProgressArg)

{
    return GDALPolygonizeT<std::int64_t, IntEqualityTest>(
        hSrcBand, hMaskBand, hOutLayer, iPixValField, papszOptions, pfnProgress,
        pProgressArg, GDT_Int64);
}

/************************************************************************/
/*                           GDALFPolygonize()                           */
/************************************************************************/

/**
 * Create polygon coverage from raster data.
 *
 * This function creates vector polygons for all connected regions of pixels in
 * the raster sharing a common pixel value.  Optionally each polygon may be
 * labeled with the pixel value in an attribute.  Optionally a mask band
 * can be provided to determine which pixels are eligible for processing.
 *
 * The source pixel band values are read into a 32bit float buffer. If you want
 * to use a (probably faster) version using signed 32bit integer buffer, see
 * GDALPolygonize().
 *
 * Polygon features will be created on the output layer, with polygon
 * geometries representing the polygons.  The polygon geometries will be
 * in the georeferenced coordinate system of the image (based on the
 * geotransform of the source dataset).  It is acceptable for the output
 * layer to already have features.  Note that GDALFPolygonize() does not
 * set the coordinate system on the output layer.  Application code should
 * do this when the layer is created, presumably matching the raster
 * coordinate system.
 *
 * The algorithm used attempts to minimize memory use so that very large
 * rasters can be processed.  However, if the raster has many polygons
 * or very large/complex polygons, the memory use for holding polygon
 * enumerations and active polygon geometries may grow to be quite large.
 *
 * The algorithm will generally produce very dense polygon geometries, with
 * edges that follow exactly on pixel boundaries for all non-interior pixels.
 * For non-thematic raster data (such as satellite images) the result will
 * essentially be one small polygon per pixel, and memory and output layer
 * sizes will be substantial.  The algorithm is primarily intended for
 * relatively simple thematic imagery, masks, and classification results.
 *
 * @param hSrcBand the source raster band to be processed.
 * @param hMaskBand an optional mask band.  All pixels in the mask band with a
 * value other than zero will be considered suitable for collection as
 * polygons.
 * @param hOutLayer the vector feature layer to which the polygons should
 * be written.
 * @param iPixValField the attribute field index indicating the feature
 * attribute into which the pixel value of the polygon should be written. Or
 * -1 to indicate that the pixel value must not be written.
 * @param papszOptions a name/value list of additional options
 * <ul>
 * <li>8CONNECTED=8: May be set to "8" to use 8 connectedness.
 * Otherwise 4 connectedness will be applied to the algorithm</li>
 * <li>DATASET_FOR_GEOREF=dataset_name: Name of a dataset from which to read
 * the geotransform. This useful if hSrcBand has no related dataset, which is
 * typical for mask bands.</li>
 * </ul>
 * @param pfnProgress callback for reporting algorithm progress matching the
 * GDALProgressFunc() semantics.  May be NULL.
 * @param pProgressArg callback argument passed to pfnProgress.
 *
 * @return CE_None on success or CE_Failure on a failure.
 *
 * @since GDAL 1.9.0
 */

CPLErr CPL_STDCALL GDALFPolygonize(GDALRasterBandH hSrcBand,
                                   GDALRasterBandH hMaskBand,
                                   OGRLayerH hOutLayer, int iPixValField,
                                   char **papszOptions,
                                   GDALProgressFunc pfnProgress,
                                   void *pProgressArg)

{
    return GDALPolygonizeT<float, FloatEqualityTest>(
        hSrcBand, hMaskBand, hOutLayer, iPixValField, papszOptions, pfnProgress,
        pProgressArg, GDT_Float32);
}