ADBC: add spatial support for DuckDB databases and GeoParquet

- Automate loading duckdb_spatial extension when installed, and when
  the dataset is DuckDB or Parquet
- Retrieve geometries (GEOMETRY type) as OGR geometries
- Read GeoParquet metadata to figure out spatial extent, CRS and
  geometry type per geometry column
- Use duckdb_spatial ST_Intersects() for faster spatial filtering
  (when done with OGRLayer::SetSpatialFilter()), potentially
  leveraging DuckDB RTree when it is available.
- Use GeoParquet bounding box column in complement to above
- Passthrough forward of WHERE claused expresse through
  OGRLayer::SetAttributeFilter()
This commit is contained in:
Even Rouault 2024-12-07 17:53:40 +01:00
parent 04b227e386
commit e7332ae72c
No known key found for this signature in database
GPG Key ID: 33EBBFC47B3DD87D
9 changed files with 1053 additions and 58 deletions

View File

@ -342,6 +342,7 @@ jobs:
# For cache
mkdir -p .gdal
mkdir -p .duckdb
docker run \
-e CI \
@ -355,6 +356,7 @@ jobs:
--add-host=host.docker.internal:host-gateway \
--rm \
-v $(pwd)/.gdal:/.gdal \
-v $(pwd)/.duckdb:/.duckdb \
-v $(pwd):$(pwd) \
--workdir $(pwd)/build-${{ matrix.id }} \
${CONTAINER_NAME_FULL} \

View File

@ -4057,14 +4057,16 @@ static int64_t CountFeaturesUsingArrowStream(OGRLayer *poLayer,
if (nExpectedFID >= 0 && !bExpectedFIDFound)
{
bOK = false;
printf("ERROR: expected to find feature of id %" PRId64
printf("ERROR: CountFeaturesUsingArrowStream() :"
"expected to find feature of id %" PRId64
", but did not get it\n",
nExpectedFID);
}
if (nUnexpectedFID >= 0 && bUnexpectedFIDFound)
{
bOK = false;
printf("ERROR: expected *not* to find feature of id %" PRId64
printf("ERROR: CountFeaturesUsingArrowStream(): "
"expected *not* to find feature of id %" PRId64
", but did get it\n",
nUnexpectedFID);
}

View File

@ -231,29 +231,97 @@ def test_ogr_adbc_duckdb_parquet_with_sql_open_option():
###############################################################################
def test_ogr_adbc_duckdb_parquet_with_spatial():
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
def test_ogr_adbc_duckdb_parquet_with_spatial(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
if not _has_libduckdb():
pytest.skip("libduckdb.so missing")
if gdaltest.is_travis_branch("ubuntu_2404"):
# Works locally for me when replicating the Dockerfile ...
pytest.skip("fails on ubuntu_2404 for unknown reason")
with gdal.config_option(
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL
):
with gdal.OpenEx(
"data/parquet/poly.parquet",
gdal.OF_VECTOR,
allowed_drivers=["ADBC"],
open_options=[
"PRELUDE_STATEMENTS=INSTALL spatial",
]
if OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL == "ON"
else [],
) as ds:
lyr = ds.GetLayer(0)
assert lyr.GetGeomType() == ogr.wkbPolygon
assert lyr.TestCapability(ogr.OLCFastGetExtent)
assert lyr.TestCapability(ogr.OLCFastSpatialFilter)
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, maxx, miny, maxy) == (
478315.53125,
481645.3125,
4762880.5,
4765610.5,
)
assert lyr.GetExtent3D() == (
478315.53125,
481645.3125,
4762880.5,
4765610.5,
float("inf"),
float("-inf"),
)
assert lyr.GetSpatialRef().GetAuthorityCode(None) == "27700"
f = lyr.GetNextFeature()
assert f.GetGeometryRef().ExportToWkt().startswith("POLYGON ((")
with gdal.OpenEx(
"data/parquet/poly.parquet",
gdal.OF_VECTOR,
allowed_drivers=["ADBC"],
open_options=[
"PRELUDE_STATEMENTS=INSTALL spatial",
"PRELUDE_STATEMENTS=LOAD spatial",
],
) as ds:
assert lyr.GetFeatureCount() == 10
lyr.SetAttributeFilter("false")
assert lyr.GetFeatureCount() == 0
lyr.SetAttributeFilter("true")
lyr.SetAttributeFilter(None)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, minx, maxy)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(maxx, miny, maxx, maxy)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(minx, miny, maxx, miny)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(minx, maxy, maxx, maxy)
assert lyr.GetFeatureCount() < 10
lyr.SetAttributeFilter("true")
lyr.SetSpatialFilter(None)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 10
lyr.SetAttributeFilter("false")
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 0
###############################################################################
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
def test_ogr_adbc_duckdb_with_spatial_index(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
if not _has_libduckdb():
pytest.skip("libduckdb.so missing")
with gdal.config_option(
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL
):
ds = ogr.Open("data/duckdb/poly_with_spatial_index.duckdb")
lyr = ds.GetLayer(0)
with ds.ExecuteSQL(
"SELECT ST_AsText(geometry) FROM read_parquet('data/parquet/poly.parquet')"
"SELECT 1 FROM duckdb_extensions() WHERE extension_name='spatial' AND loaded = true"
) as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0).startswith("POLYGON")
spatial_loaded = sql_lyr.GetNextFeature() is not None
assert lyr.TestCapability(ogr.OLCFastSpatialFilter) == spatial_loaded
###############################################################################
@ -325,6 +393,30 @@ def test_ogr_adbc_test_ogrsf_parquet_filename_with_glob():
assert "ERROR" not in ret
###############################################################################
# Run test_ogrsf on a GeoParquet file
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
def test_ogr_adbc_test_ogrsf_geoparquet(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
if not _has_libduckdb():
pytest.skip("libduckdb.so missing")
import test_cli_utilities
if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()
ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path()
+ f" -ro ADBC:data/parquet/poly.parquet --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
)
assert "INFO" in ret
assert "ERROR" not in ret
###############################################################################
# Test DATETIME_AS_STRING=YES GetArrowStream() option
@ -359,7 +451,8 @@ def test_ogr_adbc_arrow_stream_numpy_datetime_as_string(tmp_vsimem):
# Run test_ogrsf on a DuckDB dataset
def test_ogr_adbc_test_ogrsf_duckdb():
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
def test_ogr_adbc_test_ogrsf_duckdb(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
if not _has_libduckdb():
pytest.skip("libduckdb.so missing")
@ -370,7 +463,34 @@ def test_ogr_adbc_test_ogrsf_duckdb():
pytest.skip()
ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path() + " -ro ADBC:data/duckdb/poly.duckdb"
test_cli_utilities.get_test_ogrsf_path()
+ f" -ro ADBC:data/duckdb/poly.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
)
assert "INFO" in ret
assert "ERROR" not in ret
###############################################################################
# Run test_ogrsf on a DuckDB dataset
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
def test_ogr_adbc_test_ogrsf_duckdb_with_spatial_index(
OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL,
):
if not _has_libduckdb():
pytest.skip("libduckdb.so missing")
import test_cli_utilities
if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()
ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path()
+ f" -ro ADBC:data/duckdb/poly_with_spatial_index.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
)
assert "INFO" in ret

View File

@ -25,7 +25,11 @@ Consult the `installation instruction <https://arrow.apache.org/adbc/current/dri
for the various ADBC drivers. At time of writing, there are drivers for
SQLite3, PostgreSQL, Snowflake, BigQuery, DuckDB, Flight SQL, etc.
The driver is read-only, and there is no support for spatial data currently.
The driver is read-only.
There is spatial support when the underlying ADBC driver is DuckDB, for
native spatial DuckDB databases and GeoParquet datasets, and when the spatial
extension is installed.
Connection string
-----------------
@ -106,6 +110,15 @@ GDAL ADBC driver as a way of locating and loading the ADBC driver if GDAL was
not built with ADBC Driver Manager support or if an embedding application has
an updated or augmented collection of drivers available.
Filtering
---------
Attribute filters are passed to the underlying ADBC engine.
Spatial filters are passed to DuckDB when it is the underlying ADBC engine
and for DuckDB spatial databases and GeoParquet datasets. GeoParquet bounding
box column and/or DuckDB native RTree spatial indices are used when available.
Examples
--------

View File

@ -87,9 +87,24 @@ class OGRADBCDataset;
class OGRADBCLayer final : public OGRLayer,
public OGRGetNextFeatureThroughRaw<OGRADBCLayer>
{
public:
//! Describe the bbox column of a geometry column
struct GeomColBBOX
{
std::string osXMin{}; // empty if no bbox column
std::string osYMin{};
std::string osXMax{};
std::string osYMax{};
};
private:
friend class OGRADBCDataset;
OGRADBCDataset *m_poDS = nullptr;
const std::string m_osBaseStatement{}; // as provided by user
std::string m_osModifiedBaseStatement{}; // above tuned to use ST_AsWKB()
std::string m_osModifiedSelect{}; // SELECT part of above
std::string m_osAttributeFilter{};
std::unique_ptr<AdbcStatement> m_statement{};
std::unique_ptr<OGRArrowArrayToOGRFeatureAdapterLayer> m_poAdapterLayer{};
std::unique_ptr<OGRArrowArrayStream> m_stream{};
@ -103,17 +118,27 @@ class OGRADBCLayer final : public OGRLayer,
GIntBig m_nFeatureID = 0;
bool m_bIsParquetLayer = false;
std::vector<GeomColBBOX>
m_geomColBBOX{}; // same size as GetGeomFieldCount()
std::vector<OGREnvelope3D> m_extents{}; // same size as GetGeomFieldCount()
OGRFeature *GetNextRawFeature();
bool GetArrowStreamInternal(struct ArrowArrayStream *out_stream);
GIntBig GetFeatureCountParquet();
void BuildLayerDefn(bool bInternalUse);
bool ReplaceStatement(const char *pszNewStatement);
bool UpdateStatement();
std::string GetCurrentStatement() const;
CPL_DISALLOW_COPY_ASSIGN(OGRADBCLayer)
public:
OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName,
const char *pszStatement,
std::unique_ptr<AdbcStatement> poStatement,
std::unique_ptr<OGRArrowArrayStream> poStream,
ArrowSchema *schema);
ArrowSchema *schema, bool bInternalUse);
~OGRADBCLayer() override;
OGRFeatureDefn *GetLayerDefn() override
@ -128,6 +153,20 @@ class OGRADBCLayer final : public OGRLayer,
bool GetArrowStream(struct ArrowArrayStream *out_stream,
CSLConstList papszOptions = nullptr) override;
GIntBig GetFeatureCount(int bForce) override;
void SetSpatialFilter(OGRGeometry *poGeom) override
{
SetSpatialFilter(0, poGeom);
}
OGRErr SetAttributeFilter(const char *pszFilter) override;
void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override;
OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override;
OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent,
int bForce = TRUE) override;
OGRErr GetExtent3D(int iGeomField, OGREnvelope3D *psExtent,
int bForce = TRUE) override;
};
/************************************************************************/
@ -143,6 +182,8 @@ class OGRADBCDataset final : public GDALDataset
std::unique_ptr<AdbcConnection> m_connection{};
std::vector<std::unique_ptr<OGRLayer>> m_apoLayers{};
std::string m_osParquetFilename{};
bool m_bIsDuckDB = false;
bool m_bSpatialLoaded = false;
public:
OGRADBCDataset() = default;
@ -164,7 +205,13 @@ class OGRADBCDataset final : public GDALDataset
OGRLayer *GetLayerByName(const char *pszName) override;
std::unique_ptr<OGRADBCLayer> CreateLayer(const char *pszStatement,
const char *pszLayerName);
const char *pszLayerName,
bool bInternalUse);
std::unique_ptr<OGRADBCLayer> CreateInternalLayer(const char *pszStatement)
{
return CreateLayer(pszStatement, "temp", true);
}
OGRLayer *ExecuteSQL(const char *pszStatement, OGRGeometry *poSpatialFilter,
const char *pszDialect) override;

View File

@ -15,6 +15,7 @@
#include "ogradbcdrivercore.h"
#include "ogr_mem.h"
#include "ogr_p.h"
#include "cpl_error.h"
#include "cpl_json.h"
#include "gdal_adbc.h"
@ -79,7 +80,8 @@ OGRADBCDataset::~OGRADBCDataset()
/************************************************************************/
std::unique_ptr<OGRADBCLayer>
OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName)
OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName,
bool bInternalUse)
{
OGRADBCError error;
@ -164,7 +166,8 @@ OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName)
}
return std::make_unique<OGRADBCLayer>(
this, pszLayerName, std::move(statement), std::move(stream), &schema);
this, pszLayerName, osStatement.c_str(), std::move(statement),
std::move(stream), &schema, bInternalUse);
}
/************************************************************************/
@ -181,7 +184,7 @@ OGRLayer *OGRADBCDataset::ExecuteSQL(const char *pszStatement,
pszDialect);
}
auto poLayer = CreateLayer(pszStatement, "RESULTSET");
auto poLayer = CreateLayer(pszStatement, "RESULTSET", false);
if (poLayer && poSpatialFilter)
{
if (poLayer->GetGeomType() == wkbNone)
@ -211,7 +214,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
}
const char *pszADBCDriverName =
CSLFetchNameValue(poOpenInfo->papszOpenOptions, "ADBC_DRIVER");
const bool bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo);
m_bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo);
const bool bIsSQLite3 =
(pszADBCDriverName && EQUAL(pszADBCDriverName, "adbc_driver_sqlite")) ||
OGRADBCDriverIsSQLite3(poOpenInfo);
@ -221,7 +224,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
if (!pszADBCDriverName)
{
if (bIsDuckDB || bIsParquet)
if (m_bIsDuckDB || bIsParquet)
{
pszADBCDriverName =
#ifdef _WIN32
@ -249,7 +252,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
// Load the driver
if (pszADBCDriverName &&
(bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
(m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
{
if (OGRADBCLoadDriver(pszADBCDriverName, "duckdb_adbc_init", &m_driver,
error) != ADBC_STATUS_OK)
@ -280,7 +283,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
// Set options
if (pszADBCDriverName &&
(bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
(m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
{
if (ADBC_CALL(DatabaseSetOption, &m_database, "path",
bIsParquet ? ":memory:" : pszFilename,
@ -346,9 +349,30 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
for (const char *pszStatement :
cpl::Iterate(CSLConstList(papszPreludeStatements)))
{
CreateLayer(pszStatement, "temp");
CreateInternalLayer(pszStatement);
}
CSLDestroy(papszPreludeStatements);
if ((bIsParquet || m_bIsDuckDB) &&
CPLTestBool(
CPLGetConfigOption("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", "ON")))
{
auto poTmpLayer =
CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE "
"extension_name='spatial' AND loaded = false");
if (poTmpLayer && std::unique_ptr<OGRFeature>(
poTmpLayer->GetNextFeature()) != nullptr)
{
CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
CreateInternalLayer("LOAD spatial");
}
poTmpLayer =
CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE "
"extension_name='spatial' AND loaded = true");
m_bSpatialLoaded =
poTmpLayer && std::unique_ptr<OGRFeature>(
poTmpLayer->GetNextFeature()) != nullptr;
}
std::string osLayerName = "RESULTSET";
std::string osSQL;
@ -374,18 +398,89 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
{
if (pszSQL[0])
{
auto poLayer = CreateLayer(pszSQL, osLayerName.c_str());
std::unique_ptr<OGRADBCLayer> poLayer;
if ((bIsParquet || m_bIsDuckDB) && m_bSpatialLoaded)
{
std::string osErrorMsg;
{
CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false);
if (!poLayer)
osErrorMsg = CPLGetLastErrorMsg();
}
if (!poLayer)
{
CPLDebug("ADBC",
"Connecting with 'LOAD spatial' did not work "
"(%s). Retrying without it",
osErrorMsg.c_str());
ADBC_CALL(ConnectionRelease, m_connection.get(), error);
m_connection.reset();
ADBC_CALL(DatabaseRelease, &m_database, error);
memset(&m_database, 0, sizeof(m_database));
if (ADBC_CALL(DatabaseNew, &m_database, error) !=
ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcDatabaseNew() failed: %s",
error.message());
return false;
}
if (ADBC_CALL(DatabaseSetOption, &m_database, "path",
":memory:", error) != ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcDatabaseSetOption() failed: %s",
error.message());
return false;
}
if (ADBC_CALL(DatabaseInit, &m_database, error) !=
ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcDatabaseInit() failed: %s",
error.message());
return false;
}
m_connection = std::make_unique<AdbcConnection>();
if (ADBC_CALL(ConnectionNew, m_connection.get(), error) !=
ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcConnectionNew() failed: %s",
error.message());
return false;
}
if (ADBC_CALL(ConnectionInit, m_connection.get(),
&m_database, error) != ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcConnectionInit() failed: %s",
error.message());
return false;
}
}
}
if (!poLayer)
return false;
{
poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false);
if (!poLayer)
return false;
}
poLayer->m_bIsParquetLayer = bIsParquetLayer;
m_apoLayers.emplace_back(std::move(poLayer));
}
}
else if (bIsDuckDB || bIsSQLite3)
else if (m_bIsDuckDB || bIsSQLite3)
{
auto poLayerList = CreateLayer(
"SELECT name FROM sqlite_master WHERE type IN ('table', 'view')",
"LAYERLIST");
auto poLayerList = CreateInternalLayer(
"SELECT name FROM sqlite_master WHERE type IN ('table', 'view')");
if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 1)
{
return false;
@ -400,7 +495,8 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
CPLSPrintf("SELECT * FROM \"%s\"",
OGRDuplicateCharacter(pszLayerName, '"').c_str());
CPLTurnFailureIntoWarning(true);
auto poLayer = CreateLayer(osStatement.c_str(), pszLayerName);
auto poLayer =
CreateLayer(osStatement.c_str(), pszLayerName, false);
CPLTurnFailureIntoWarning(false);
if (poLayer)
{
@ -410,13 +506,12 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
}
else if (bIsPostgreSQL)
{
auto poLayerList = CreateLayer(
auto poLayerList = CreateInternalLayer(
"SELECT n.nspname, c.relname FROM pg_class c "
"JOIN pg_namespace n ON c.relnamespace = n.oid "
"AND c.relkind in ('r','v','m','f') "
"AND n.nspname NOT IN ('pg_catalog', 'information_schema') "
"ORDER BY c.oid",
"LAYERLIST");
"ORDER BY c.oid");
if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 2)
{
return false;
@ -432,9 +527,9 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
OGRDuplicateCharacter(pszTableName, '"').c_str());
CPLTurnFailureIntoWarning(true);
auto poLayer =
CreateLayer(osStatement.c_str(),
CPLSPrintf("%s.%s", pszNamespace, pszTableName));
auto poLayer = CreateLayer(
osStatement.c_str(),
CPLSPrintf("%s.%s", pszNamespace, pszTableName), false);
CPLTurnFailureIntoWarning(false);
if (poLayer)
{
@ -474,8 +569,9 @@ OGRLayer *OGRADBCDataset::GetLayerByName(const char *pszName)
}
auto statement = std::make_unique<AdbcStatement>();
OGRADBCLayer tmpLayer(this, "", std::move(statement),
std::move(objectsStream), &schema);
OGRADBCLayer tmpLayer(this, "", "", std::move(statement),
std::move(objectsStream), &schema,
/* bInternalUse = */ true);
const auto tmpLayerDefn = tmpLayer.GetLayerDefn();
if (tmpLayerDefn->GetFieldIndex("catalog_name") < 0 ||
tmpLayerDefn->GetFieldIndex("catalog_db_schemas") < 0)

View File

@ -12,31 +12,434 @@
****************************************************************************/
#include "ogr_adbc.h"
#include "ogr_spatialref.h"
#include "ogr_p.h"
#include "cpl_json.h"
#include <cmath>
#include <limits>
#include <map>
#include <set>
#define ADBC_CALL(func, ...) m_poDS->m_driver.func(__VA_ARGS__)
/************************************************************************/
/* GetGeometryTypeFromString() */
/************************************************************************/
static OGRwkbGeometryType GetGeometryTypeFromString(const std::string &osType)
{
OGRwkbGeometryType eGeomType = wkbUnknown;
OGRReadWKTGeometryType(osType.c_str(), &eGeomType);
if (eGeomType == wkbUnknown && !osType.empty())
{
CPLDebug("ADBC", "Unknown geometry type: %s", osType.c_str());
}
return eGeomType;
}
/************************************************************************/
/* OGRADBCLayer() */
/************************************************************************/
OGRADBCLayer::OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName,
const char *pszStatement,
std::unique_ptr<AdbcStatement> poStatement,
std::unique_ptr<OGRArrowArrayStream> poStream,
ArrowSchema *schema)
: m_poDS(poDS), m_statement(std::move(poStatement)),
m_stream(std::move(poStream))
ArrowSchema *schema, bool bInternalUse)
: m_poDS(poDS), m_osBaseStatement(pszStatement),
m_osModifiedBaseStatement(m_osBaseStatement),
m_statement(std::move(poStatement)), m_stream(std::move(poStream))
{
SetDescription(pszName);
memcpy(&m_schema, schema, sizeof(m_schema));
schema->release = nullptr;
m_poAdapterLayer =
std::make_unique<OGRArrowArrayToOGRFeatureAdapterLayer>(pszName);
BuildLayerDefn(bInternalUse);
}
/************************************************************************/
/* ParseGeometryColumnCovering() */
/************************************************************************/
//! Parse bounding box column definition
static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef,
std::string &osBBOXColumn,
std::string &osXMin,
std::string &osYMin,
std::string &osXMax,
std::string &osYMax)
{
const auto oCovering = oJSONDef["covering"];
if (oCovering.IsValid() &&
oCovering.GetType() == CPLJSONObject::Type::Object)
{
const auto oBBOX = oCovering["bbox"];
if (oBBOX.IsValid() && oBBOX.GetType() == CPLJSONObject::Type::Object)
{
const auto oXMin = oBBOX["xmin"];
const auto oYMin = oBBOX["ymin"];
const auto oXMax = oBBOX["xmax"];
const auto oYMax = oBBOX["ymax"];
if (oXMin.IsValid() && oYMin.IsValid() && oXMax.IsValid() &&
oYMax.IsValid() &&
oXMin.GetType() == CPLJSONObject::Type::Array &&
oYMin.GetType() == CPLJSONObject::Type::Array &&
oXMax.GetType() == CPLJSONObject::Type::Array &&
oYMax.GetType() == CPLJSONObject::Type::Array)
{
const auto osXMinArray = oXMin.ToArray();
const auto osYMinArray = oYMin.ToArray();
const auto osXMaxArray = oXMax.ToArray();
const auto osYMaxArray = oYMax.ToArray();
if (osXMinArray.Size() == 2 && osYMinArray.Size() == 2 &&
osXMaxArray.Size() == 2 && osYMaxArray.Size() == 2 &&
osXMinArray[0].GetType() == CPLJSONObject::Type::String &&
osXMinArray[1].GetType() == CPLJSONObject::Type::String &&
osYMinArray[0].GetType() == CPLJSONObject::Type::String &&
osYMinArray[1].GetType() == CPLJSONObject::Type::String &&
osXMaxArray[0].GetType() == CPLJSONObject::Type::String &&
osXMaxArray[1].GetType() == CPLJSONObject::Type::String &&
osYMaxArray[0].GetType() == CPLJSONObject::Type::String &&
osYMaxArray[1].GetType() == CPLJSONObject::Type::String &&
osXMinArray[0].ToString() == osYMinArray[0].ToString() &&
osXMinArray[0].ToString() == osXMaxArray[0].ToString() &&
osXMinArray[0].ToString() == osYMaxArray[0].ToString())
{
osBBOXColumn = osXMinArray[0].ToString();
osXMin = osXMinArray[1].ToString();
osYMin = osYMinArray[1].ToString();
osXMax = osXMaxArray[1].ToString();
osYMax = osYMaxArray[1].ToString();
return true;
}
}
}
}
return false;
}
/************************************************************************/
/* ParseGeoParquetColumn() */
/************************************************************************/
static void ParseGeoParquetColumn(
const CPLJSONObject &oColumn,
std::map<std::string, OGRwkbGeometryType> &oMapType,
std::map<std::string, OGREnvelope3D> &oMapExtent,
std::map<std::string, OGRADBCLayer::GeomColBBOX>
&oMapGeomColumnToCoveringBBOXColumn,
std::map<std::string, std::unique_ptr<OGRSpatialReference>>
&oMapGeomColumnsFromGeoParquet,
std::set<std::string> &oSetCoveringBBoxColumn)
{
auto oCrs = oColumn.GetObj("crs");
if (!oCrs.IsValid())
{
// WGS 84 is implied if no crs member is found.
auto poSRS = std::make_unique<OGRSpatialReference>();
poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
poSRS->importFromEPSG(4326);
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = std::move(poSRS);
}
else if (oCrs.GetType() == CPLJSONObject::Type::Object)
{
// CRS encoded as PROJJSON (extension)
const auto oType = oCrs["type"];
if (oType.IsValid() && oType.GetType() == CPLJSONObject::Type::String)
{
const auto osType = oType.ToString();
if (osType.find("CRS") != std::string::npos)
{
auto poSRS = std::make_unique<OGRSpatialReference>();
poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
if (poSRS->SetFromUserInput(oCrs.ToString().c_str()) ==
OGRERR_NONE)
{
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] =
std::move(poSRS);
}
}
}
}
else
{
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = nullptr;
}
OGRwkbGeometryType eGeomType = wkbUnknown;
auto oType = oColumn.GetObj("geometry_types");
if (oType.GetType() == CPLJSONObject::Type::Array)
{
const auto oTypeArray = oType.ToArray();
if (oTypeArray.Size() == 1)
{
eGeomType = GetGeometryTypeFromString(oTypeArray[0].ToString());
}
else if (oTypeArray.Size() > 1)
{
const auto PromoteToCollection = [](OGRwkbGeometryType eType)
{
if (eType == wkbPoint)
return wkbMultiPoint;
if (eType == wkbLineString)
return wkbMultiLineString;
if (eType == wkbPolygon)
return wkbMultiPolygon;
return eType;
};
bool bMixed = false;
bool bHasMulti = false;
bool bHasZ = false;
bool bHasM = false;
const auto eFirstType = OGR_GT_Flatten(
GetGeometryTypeFromString(oTypeArray[0].ToString()));
const auto eFirstTypeCollection = PromoteToCollection(eFirstType);
for (int i = 0; i < oTypeArray.Size(); ++i)
{
const auto eThisGeom =
GetGeometryTypeFromString(oTypeArray[i].ToString());
if (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) !=
eFirstTypeCollection)
{
bMixed = true;
break;
}
bHasZ |= OGR_GT_HasZ(eThisGeom) != FALSE;
bHasM |= OGR_GT_HasM(eThisGeom) != FALSE;
bHasMulti |= (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) ==
OGR_GT_Flatten(eThisGeom));
}
if (!bMixed)
{
if (eFirstTypeCollection == wkbMultiPolygon ||
eFirstTypeCollection == wkbMultiLineString)
{
if (bHasMulti)
eGeomType = OGR_GT_SetModifier(eFirstTypeCollection,
bHasZ, bHasM);
else
eGeomType =
OGR_GT_SetModifier(eFirstType, bHasZ, bHasM);
}
}
}
}
oMapType[oColumn.GetName()] = eGeomType;
OGREnvelope3D sExtent;
const auto oBBox = oColumn.GetArray("bbox");
if (oBBox.IsValid() && oBBox.Size() == 4)
{
sExtent.MinX = oBBox[0].ToDouble();
sExtent.MinY = oBBox[1].ToDouble();
sExtent.MinZ = std::numeric_limits<double>::infinity();
sExtent.MaxX = oBBox[2].ToDouble();
sExtent.MaxY = oBBox[3].ToDouble();
sExtent.MaxZ = -std::numeric_limits<double>::infinity();
if (sExtent.MinX <= sExtent.MaxX)
{
oMapExtent[oColumn.GetName()] = sExtent;
}
}
else if (oBBox.IsValid() && oBBox.Size() == 6)
{
sExtent.MinX = oBBox[0].ToDouble();
sExtent.MinY = oBBox[1].ToDouble();
sExtent.MinZ = oBBox[2].ToDouble();
sExtent.MaxX = oBBox[3].ToDouble();
sExtent.MaxY = oBBox[4].ToDouble();
sExtent.MaxZ = oBBox[5].ToDouble();
if (sExtent.MinX <= sExtent.MaxX)
{
oMapExtent[oColumn.GetName()] = sExtent;
}
}
std::string osBBOXColumn;
std::string osXMin, osYMin, osXMax, osYMax;
if (ParseGeometryColumnCovering(oColumn, osBBOXColumn, osXMin, osYMin,
osXMax, osYMax))
{
OGRADBCLayer::GeomColBBOX geomColBBOX;
const std::string osPrefix =
std::string("\"")
.append(OGRDuplicateCharacter(osBBOXColumn, '"'))
.append("\".\"");
geomColBBOX.osXMin = std::string(osPrefix)
.append(OGRDuplicateCharacter(osXMin, '"'))
.append("\"");
geomColBBOX.osYMin = std::string(osPrefix)
.append(OGRDuplicateCharacter(osYMin, '"'))
.append("\"");
geomColBBOX.osXMax = std::string(osPrefix)
.append(OGRDuplicateCharacter(osXMax, '"'))
.append("\"");
geomColBBOX.osYMax = std::string(osPrefix)
.append(OGRDuplicateCharacter(osYMax, '"'))
.append("\"");
oMapGeomColumnToCoveringBBOXColumn[oColumn.GetName()] = geomColBBOX;
oSetCoveringBBoxColumn.insert(osBBOXColumn);
}
}
/************************************************************************/
/* BuildLayerDefn() */
/************************************************************************/
void OGRADBCLayer::BuildLayerDefn(bool bInternalUse)
{
// Identify geometry columns for Parquet files, and query them with
// ST_AsWKB() to avoid getting duckdb_spatial own geometry encoding
// (https://github.com/duckdb/duckdb_spatial/blob/a60aa3733741a99c49baaf33390c0f7c8a9598a3/spatial/src/spatial/core/geometry/geometry_serialization.cpp#L11)
std::map<std::string, std::unique_ptr<OGRSpatialReference>> oMapGeomColumns;
std::map<std::string, OGRwkbGeometryType> oMapType;
std::map<std::string, OGREnvelope3D> oMapExtent;
std::map<std::string, GeomColBBOX> oMapGeomColumnToCoveringBBOXColumn;
if (!bInternalUse && STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT ") &&
(m_poDS->m_bIsDuckDB ||
(!m_poDS->m_osParquetFilename.empty() &&
CPLString(m_osBaseStatement)
.ifind(std::string(" FROM '").append(OGRDuplicateCharacter(
m_poDS->m_osParquetFilename, '\''))) !=
std::string::npos)))
{
// Try to read GeoParquet 'geo' metadata
std::map<std::string, std::unique_ptr<OGRSpatialReference>>
oMapGeomColumnsFromGeoParquet;
std::set<std::string> oSetCoveringBBoxColumn;
std::string osGeoParquetMD;
if (!m_poDS->m_osParquetFilename.empty())
{
auto poMetadataLayer = m_poDS->CreateInternalLayer(
std::string("SELECT value FROM parquet_kv_metadata('")
.append(OGRDuplicateCharacter(m_poDS->m_osParquetFilename,
'\''))
.append("') WHERE key = 'geo'")
.c_str());
if (poMetadataLayer)
{
auto f = std::unique_ptr<OGRFeature>(
poMetadataLayer->GetNextFeature());
if (f)
{
int nBytes = 0;
const GByte *pabyData = f->GetFieldAsBinary(0, &nBytes);
osGeoParquetMD.assign(
reinterpret_cast<const char *>(pabyData), nBytes);
// CPLDebug("ADBC", "%s", osGeoParquetMD.c_str());
}
}
}
CPLJSONDocument oDoc;
if (!osGeoParquetMD.empty() && oDoc.LoadMemory(osGeoParquetMD))
{
const auto oColums = oDoc.GetRoot().GetObj("columns");
for (const auto &oColumn : oColums.GetChildren())
{
if (oColumn.GetString("encoding") == "WKB")
{
ParseGeoParquetColumn(oColumn, oMapType, oMapExtent,
oMapGeomColumnToCoveringBBOXColumn,
oMapGeomColumnsFromGeoParquet,
oSetCoveringBBoxColumn);
}
}
}
auto poDescribeLayer = m_poDS->CreateInternalLayer(
std::string("DESCRIBE ").append(m_osBaseStatement).c_str());
std::string osNewStatement;
bool bNewStatement = false;
if (poDescribeLayer &&
// cppcheck-suppress knownConditionTrueFalse
(m_poDS->m_bIsDuckDB || !oMapGeomColumnsFromGeoParquet.empty()))
{
for (auto &&f : *poDescribeLayer)
{
const char *pszColName = f->GetFieldAsString("column_name");
if (cpl::contains(oSetCoveringBBoxColumn, pszColName))
{
bNewStatement = true;
continue;
}
// f->DumpReadable(stdout);
if (osNewStatement.empty())
osNewStatement = "SELECT ";
else
osNewStatement += ", ";
auto oIter = oMapGeomColumnsFromGeoParquet.find(pszColName);
if (oIter != oMapGeomColumnsFromGeoParquet.end())
{
oMapGeomColumns[pszColName] = std::move(oIter->second);
}
if (EQUAL(f->GetFieldAsString("column_type"), "GEOMETRY") &&
m_poDS->m_bSpatialLoaded)
{
bNewStatement = true;
osNewStatement += "ST_AsWKB(\"";
osNewStatement += OGRDuplicateCharacter(pszColName, '"');
osNewStatement += "\") AS ";
if (oIter == oMapGeomColumnsFromGeoParquet.end())
oMapGeomColumns[pszColName] = nullptr;
}
osNewStatement += '"';
osNewStatement += OGRDuplicateCharacter(pszColName, '"');
osNewStatement += '"';
}
m_osModifiedSelect = osNewStatement;
osNewStatement += " FROM (";
osNewStatement += m_osBaseStatement;
osNewStatement += " )";
}
if (bNewStatement)
{
// CPLDebug("ADBC", "%s -> %s", m_osBaseStatement.c_str(), osNewStatement.c_str());
if (ReplaceStatement(osNewStatement.c_str()))
{
m_osModifiedBaseStatement = osNewStatement;
}
else
{
m_osModifiedSelect.clear();
oMapGeomColumns.clear();
}
}
}
m_poAdapterLayer = std::make_unique<OGRArrowArrayToOGRFeatureAdapterLayer>(
GetDescription());
for (int i = 0; i < m_schema.n_children; ++i)
{
m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]);
const char *pszColName = m_schema.children[i]->name;
auto oIter = oMapGeomColumns.find(pszColName);
if (oIter != oMapGeomColumns.end())
{
OGRGeomFieldDefn oGeomFieldDefn(pszColName, oMapType[pszColName]);
auto poSRS = std::move(oIter->second).release();
if (poSRS)
{
oGeomFieldDefn.SetSpatialRef(poSRS);
poSRS->Release();
}
m_poAdapterLayer->m_poLayerDefn->AddGeomFieldDefn(&oGeomFieldDefn);
m_extents.push_back(oMapExtent[pszColName]);
m_geomColBBOX.push_back(
oMapGeomColumnToCoveringBBOXColumn[pszColName]);
}
else
{
m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]);
}
}
}
@ -53,6 +456,67 @@ OGRADBCLayer::~OGRADBCLayer()
m_schema.release(&m_schema);
}
/************************************************************************/
/* ReplaceStatement() */
/************************************************************************/
bool OGRADBCLayer::ReplaceStatement(const char *pszNewStatement)
{
// CPLDebug("ADBC", "%s", pszNewStatement);
OGRADBCError error;
auto statement = std::make_unique<AdbcStatement>();
if (ADBC_CALL(StatementNew, m_poDS->m_connection.get(), statement.get(),
error) != ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined, "AdbcStatementNew() failed: %s",
error.message());
ADBC_CALL(StatementRelease, statement.get(), error);
}
else if (ADBC_CALL(StatementSetSqlQuery, statement.get(), pszNewStatement,
error) != ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcStatementSetSqlQuery() failed: %s", error.message());
error.clear();
ADBC_CALL(StatementRelease, statement.get(), error);
}
else
{
auto stream = std::make_unique<OGRArrowArrayStream>();
int64_t rows_affected = -1;
ArrowSchema newSchema;
memset(&newSchema, 0, sizeof(newSchema));
if (ADBC_CALL(StatementExecuteQuery, statement.get(), stream->get(),
&rows_affected, error) != ADBC_STATUS_OK)
{
CPLError(CE_Failure, CPLE_AppDefined,
"AdbcStatementExecuteQuery() failed: %s", error.message());
error.clear();
ADBC_CALL(StatementRelease, statement.get(), error);
}
else if (stream->get_schema(&newSchema) != 0)
{
CPLError(CE_Failure, CPLE_AppDefined, "get_schema() failed");
ADBC_CALL(StatementRelease, statement.get(), error);
}
else
{
if (m_schema.release)
m_schema.release(&m_schema);
memcpy(&m_schema, &newSchema, sizeof(newSchema));
if (m_statement)
ADBC_CALL(StatementRelease, m_statement.get(), error);
m_statement = std::move(statement);
m_stream = std::move(stream);
return true;
}
}
return false;
}
/************************************************************************/
/* GetNextRawFeature() */
/************************************************************************/
@ -99,6 +563,16 @@ OGRFeature *OGRADBCLayer::GetNextRawFeature()
}
auto poFeature = m_poAdapterLayer->m_apoFeatures[m_nIdx++].release();
const int nGeomFieldCount =
m_poAdapterLayer->m_poLayerDefn->GetFieldCount();
for (int i = 0; i < nGeomFieldCount; ++i)
{
auto poGeom = poFeature->GetGeomFieldRef(i);
if (poGeom)
poGeom->assignSpatialReference(
m_poAdapterLayer->m_poLayerDefn->GetGeomFieldDefn(i)
->GetSpatialRef());
}
poFeature->SetFID(m_nFeatureID++);
return poFeature;
}
@ -119,6 +593,194 @@ void OGRADBCLayer::ResetReading()
}
}
/************************************************************************/
/* GetExtent() */
/************************************************************************/
OGRErr OGRADBCLayer::GetExtent(OGREnvelope *psExtent, int bForce)
{
return GetExtent(0, psExtent, bForce);
}
/************************************************************************/
/* GetExtent() */
/************************************************************************/
OGRErr OGRADBCLayer::GetExtent(int iGeomField, OGREnvelope *psExtent,
int bForce)
{
if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount())
{
if (iGeomField != 0)
{
CPLError(CE_Failure, CPLE_AppDefined,
"Invalid geometry field index : %d", iGeomField);
}
return OGRERR_FAILURE;
}
*psExtent = m_extents[iGeomField];
if (psExtent->IsInit())
return OGRERR_NONE;
return GetExtentInternal(iGeomField, psExtent, bForce);
}
/************************************************************************/
/* GetExtent3D() */
/************************************************************************/
OGRErr OGRADBCLayer::GetExtent3D(int iGeomField, OGREnvelope3D *psExtent,
int bForce)
{
if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount())
{
if (iGeomField != 0)
{
CPLError(CE_Failure, CPLE_AppDefined,
"Invalid geometry field index : %d", iGeomField);
}
return OGRERR_FAILURE;
}
*psExtent = m_extents[iGeomField];
if (psExtent->IsInit())
return OGRERR_NONE;
return GetExtentInternal(iGeomField, psExtent, bForce);
}
/************************************************************************/
/* GetCurrentStatement() */
/************************************************************************/
std::string OGRADBCLayer::GetCurrentStatement() const
{
if (!m_osModifiedSelect.empty() &&
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") &&
(!m_osAttributeFilter.empty() ||
(m_poFilterGeom &&
(!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() ||
m_poDS->m_bSpatialLoaded))))
{
std::string osStatement(m_osModifiedSelect);
osStatement.append(" FROM (")
.append(m_osBaseStatement)
.append(") WHERE ");
bool bAddAnd = false;
if (m_poFilterGeom)
{
const double dfMinX = std::isinf(m_sFilterEnvelope.MinX)
? -std::numeric_limits<double>::max()
: m_sFilterEnvelope.MinX;
const double dfMinY = std::isinf(m_sFilterEnvelope.MinY)
? -std::numeric_limits<double>::max()
: m_sFilterEnvelope.MinY;
const double dfMaxX = std::isinf(m_sFilterEnvelope.MaxX)
? std::numeric_limits<double>::max()
: m_sFilterEnvelope.MaxX;
const double dfMaxY = std::isinf(m_sFilterEnvelope.MaxY)
? std::numeric_limits<double>::max()
: m_sFilterEnvelope.MaxY;
if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty())
{
bAddAnd = true;
osStatement.append(m_geomColBBOX[m_iGeomFieldFilter].osXMin)
.append(" <= ")
.append(CPLSPrintf("%.17g", dfMaxX))
.append(" AND ")
.append(m_geomColBBOX[m_iGeomFieldFilter].osXMax)
.append(" >= ")
.append(CPLSPrintf("%.17g", dfMinX))
.append(" AND ")
.append(m_geomColBBOX[m_iGeomFieldFilter].osYMin)
.append(" <= ")
.append(CPLSPrintf("%.17g", dfMaxY))
.append(" AND ")
.append(m_geomColBBOX[m_iGeomFieldFilter].osYMax)
.append(" >= ")
.append(CPLSPrintf("%.17g", dfMinY));
}
if (m_poDS->m_bSpatialLoaded)
{
if (bAddAnd)
osStatement.append(" AND ");
bAddAnd = true;
osStatement.append("ST_Intersects(\"")
.append(OGRDuplicateCharacter(
m_poAdapterLayer->m_poLayerDefn
->GetGeomFieldDefn(m_iGeomFieldFilter)
->GetNameRef(),
'"'))
.append(CPLSPrintf(
"\", ST_MakeEnvelope(%.17g,%.17g,%.17g,%.17g))", dfMinX,
dfMinY, dfMaxX, dfMaxY));
}
}
if (!m_osAttributeFilter.empty())
{
if (bAddAnd)
osStatement.append(" AND ");
osStatement.append("(");
osStatement.append(m_osAttributeFilter);
osStatement.append(")");
}
return osStatement;
}
else
{
return m_osModifiedBaseStatement;
}
}
/************************************************************************/
/* UpdateStatement() */
/************************************************************************/
bool OGRADBCLayer::UpdateStatement()
{
return ReplaceStatement(GetCurrentStatement().c_str());
}
/***********************************************************************/
/* SetAttributeFilter() */
/***********************************************************************/
OGRErr OGRADBCLayer::SetAttributeFilter(const char *pszFilter)
{
if (!m_osModifiedSelect.empty() &&
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM "))
{
m_osAttributeFilter = pszFilter ? pszFilter : "";
return UpdateStatement() ? OGRERR_NONE : OGRERR_FAILURE;
}
else
{
return OGRLayer::SetAttributeFilter(pszFilter);
}
}
/************************************************************************/
/* SetSpatialFilter() */
/************************************************************************/
void OGRADBCLayer::SetSpatialFilter(int iGeomField, OGRGeometry *poGeomIn)
{
if (!ValidateGeometryFieldIndexForSetSpatialFilter(iGeomField, poGeomIn))
return;
if (iGeomField < GetLayerDefn()->GetGeomFieldCount())
{
m_iGeomFieldFilter = iGeomField;
if (InstallFilter(poGeomIn))
ResetReading();
UpdateStatement();
}
}
/************************************************************************/
/* TestCapability() */
/************************************************************************/
@ -127,16 +789,46 @@ int OGRADBCLayer::TestCapability(const char *pszCap)
{
if (EQUAL(pszCap, OLCFastGetArrowStream))
{
return !m_poFilterGeom && !m_poAttrQuery;
return !m_poFilterGeom && !m_poAttrQuery && m_osAttributeFilter.empty();
}
else if (EQUAL(pszCap, OLCFastFeatureCount))
{
return !m_poFilterGeom && !m_poAttrQuery && m_bIsParquetLayer;
return !m_poFilterGeom && !m_poAttrQuery &&
m_osAttributeFilter.empty() && m_bIsParquetLayer;
}
else
else if (EQUAL(pszCap, OLCFastGetExtent))
{
return false;
return !m_extents.empty() && m_extents[0].IsInit();
}
else if (EQUAL(pszCap, OLCFastSpatialFilter) && m_iGeomFieldFilter >= 0 &&
m_iGeomFieldFilter < GetLayerDefn()->GetGeomFieldCount())
{
if (m_poDS->m_bSpatialLoaded && m_poDS->m_bIsDuckDB)
{
const char *pszGeomColName =
m_poAdapterLayer->m_poLayerDefn
->GetGeomFieldDefn(m_iGeomFieldFilter)
->GetNameRef();
auto poTmpLayer = m_poDS->CreateInternalLayer(CPLSPrintf(
"SELECT 1 FROM sqlite_master WHERE tbl_name = '%s' AND type = "
"'index' AND (sql LIKE '%%USING RTREE (%s)%%' OR sql LIKE "
"'%%USING RTREE (\"%s\")%%')",
OGRDuplicateCharacter(GetDescription(), '\'').c_str(),
pszGeomColName,
OGRDuplicateCharacter(pszGeomColName, '"').c_str()));
return poTmpLayer &&
std::unique_ptr<OGRFeature>(poTmpLayer->GetNextFeature());
}
else if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty())
{
// Let's assume that the presence of a geometry bounding box
// column is sufficient enough to pretend to have fast spatial
// filter capabilities
return true;
}
}
return false;
}
/************************************************************************/
@ -196,8 +888,30 @@ bool OGRADBCLayer::GetArrowStreamInternal(struct ArrowArrayStream *out_stream)
GIntBig OGRADBCLayer::GetFeatureCount(int bForce)
{
if (m_poFilterGeom || m_poAttrQuery)
if (m_poFilterGeom || m_poAttrQuery || !m_osAttributeFilter.empty())
{
if (!m_osModifiedSelect.empty() &&
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") &&
(!m_poFilterGeom ||
!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() ||
m_poDS->m_bSpatialLoaded))
{
const std::string osCurStatement = GetCurrentStatement();
auto poCountLayer = m_poDS->CreateInternalLayer(
std::string("SELECT COUNT(*) FROM (")
.append(osCurStatement)
.append(")")
.c_str());
if (poCountLayer &&
poCountLayer->GetLayerDefn()->GetFieldCount() == 1)
{
auto poFeature =
std::unique_ptr<OGRFeature>(poCountLayer->GetNextFeature());
if (poFeature)
return poFeature->GetFieldAsInteger64(0);
}
}
return OGRLayer::GetFeatureCount(bForce);
}
@ -249,7 +963,7 @@ GIntBig OGRADBCLayer::GetFeatureCountParquet()
const std::string osSQL(CPLSPrintf(
"SELECT CAST(SUM(num_rows) AS BIGINT) FROM parquet_file_metadata('%s')",
OGRDuplicateCharacter(m_poDS->m_osParquetFilename, '\'').c_str()));
auto poCountLayer = m_poDS->CreateLayer(osSQL.c_str(), "numrows");
auto poCountLayer = m_poDS->CreateInternalLayer(osSQL.c_str());
if (poCountLayer && poCountLayer->GetLayerDefn()->GetFieldCount() == 1)
{
auto poFeature =

View File

@ -650,6 +650,7 @@ constexpr static const char* const apszKnownConfigOptions[] =
"ODS_RESOLVE_FORMULAS", // from ogrodsdatasource.cpp
"OGR2OGR_MIN_FEATURES_FOR_THREADED_REPROJ", // from ogr2ogr_lib.cpp
"OGR2OGR_USE_ARROW_API", // from ogr2ogr_lib.cpp
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", // from ogradbcdataset.cpp
"OGR_API_SPY_FILE", // from ograpispy.cpp
"OGR_API_SPY_SNAPSHOT_PATH", // from ograpispy.cpp
"OGR_APPLY_GEOM_SET_PRECISION", // from ogr2ogr_lib.cpp, ogrlayer.cpp