ADBC: add spatial support for DuckDB databases and GeoParquet
- Automate loading duckdb_spatial extension when installed, and when the dataset is DuckDB or Parquet - Retrieve geometries (GEOMETRY type) as OGR geometries - Read GeoParquet metadata to figure out spatial extent, CRS and geometry type per geometry column - Use duckdb_spatial ST_Intersects() for faster spatial filtering (when done with OGRLayer::SetSpatialFilter()), potentially leveraging DuckDB RTree when it is available. - Use GeoParquet bounding box column in complement to above - Passthrough forward of WHERE claused expresse through OGRLayer::SetAttributeFilter()
This commit is contained in:
parent
04b227e386
commit
e7332ae72c
|
@ -342,6 +342,7 @@ jobs:
|
|||
|
||||
# For cache
|
||||
mkdir -p .gdal
|
||||
mkdir -p .duckdb
|
||||
|
||||
docker run \
|
||||
-e CI \
|
||||
|
@ -355,6 +356,7 @@ jobs:
|
|||
--add-host=host.docker.internal:host-gateway \
|
||||
--rm \
|
||||
-v $(pwd)/.gdal:/.gdal \
|
||||
-v $(pwd)/.duckdb:/.duckdb \
|
||||
-v $(pwd):$(pwd) \
|
||||
--workdir $(pwd)/build-${{ matrix.id }} \
|
||||
${CONTAINER_NAME_FULL} \
|
||||
|
|
|
@ -4057,14 +4057,16 @@ static int64_t CountFeaturesUsingArrowStream(OGRLayer *poLayer,
|
|||
if (nExpectedFID >= 0 && !bExpectedFIDFound)
|
||||
{
|
||||
bOK = false;
|
||||
printf("ERROR: expected to find feature of id %" PRId64
|
||||
printf("ERROR: CountFeaturesUsingArrowStream() :"
|
||||
"expected to find feature of id %" PRId64
|
||||
", but did not get it\n",
|
||||
nExpectedFID);
|
||||
}
|
||||
if (nUnexpectedFID >= 0 && bUnexpectedFIDFound)
|
||||
{
|
||||
bOK = false;
|
||||
printf("ERROR: expected *not* to find feature of id %" PRId64
|
||||
printf("ERROR: CountFeaturesUsingArrowStream(): "
|
||||
"expected *not* to find feature of id %" PRId64
|
||||
", but did get it\n",
|
||||
nUnexpectedFID);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -231,29 +231,97 @@ def test_ogr_adbc_duckdb_parquet_with_sql_open_option():
|
|||
###############################################################################
|
||||
|
||||
|
||||
def test_ogr_adbc_duckdb_parquet_with_spatial():
|
||||
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
|
||||
def test_ogr_adbc_duckdb_parquet_with_spatial(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
|
||||
|
||||
if not _has_libduckdb():
|
||||
pytest.skip("libduckdb.so missing")
|
||||
|
||||
if gdaltest.is_travis_branch("ubuntu_2404"):
|
||||
# Works locally for me when replicating the Dockerfile ...
|
||||
pytest.skip("fails on ubuntu_2404 for unknown reason")
|
||||
with gdal.config_option(
|
||||
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL
|
||||
):
|
||||
with gdal.OpenEx(
|
||||
"data/parquet/poly.parquet",
|
||||
gdal.OF_VECTOR,
|
||||
allowed_drivers=["ADBC"],
|
||||
open_options=[
|
||||
"PRELUDE_STATEMENTS=INSTALL spatial",
|
||||
]
|
||||
if OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL == "ON"
|
||||
else [],
|
||||
) as ds:
|
||||
lyr = ds.GetLayer(0)
|
||||
assert lyr.GetGeomType() == ogr.wkbPolygon
|
||||
assert lyr.TestCapability(ogr.OLCFastGetExtent)
|
||||
assert lyr.TestCapability(ogr.OLCFastSpatialFilter)
|
||||
minx, maxx, miny, maxy = lyr.GetExtent()
|
||||
assert (minx, maxx, miny, maxy) == (
|
||||
478315.53125,
|
||||
481645.3125,
|
||||
4762880.5,
|
||||
4765610.5,
|
||||
)
|
||||
assert lyr.GetExtent3D() == (
|
||||
478315.53125,
|
||||
481645.3125,
|
||||
4762880.5,
|
||||
4765610.5,
|
||||
float("inf"),
|
||||
float("-inf"),
|
||||
)
|
||||
assert lyr.GetSpatialRef().GetAuthorityCode(None) == "27700"
|
||||
f = lyr.GetNextFeature()
|
||||
assert f.GetGeometryRef().ExportToWkt().startswith("POLYGON ((")
|
||||
|
||||
with gdal.OpenEx(
|
||||
"data/parquet/poly.parquet",
|
||||
gdal.OF_VECTOR,
|
||||
allowed_drivers=["ADBC"],
|
||||
open_options=[
|
||||
"PRELUDE_STATEMENTS=INSTALL spatial",
|
||||
"PRELUDE_STATEMENTS=LOAD spatial",
|
||||
],
|
||||
) as ds:
|
||||
assert lyr.GetFeatureCount() == 10
|
||||
lyr.SetAttributeFilter("false")
|
||||
|
||||
assert lyr.GetFeatureCount() == 0
|
||||
lyr.SetAttributeFilter("true")
|
||||
|
||||
lyr.SetAttributeFilter(None)
|
||||
assert lyr.GetFeatureCount() == 10
|
||||
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
|
||||
assert lyr.GetFeatureCount() == 10
|
||||
lyr.SetSpatialFilterRect(minx, miny, minx, maxy)
|
||||
assert lyr.GetFeatureCount() < 10
|
||||
lyr.SetSpatialFilterRect(maxx, miny, maxx, maxy)
|
||||
assert lyr.GetFeatureCount() < 10
|
||||
lyr.SetSpatialFilterRect(minx, miny, maxx, miny)
|
||||
assert lyr.GetFeatureCount() < 10
|
||||
lyr.SetSpatialFilterRect(minx, maxy, maxx, maxy)
|
||||
assert lyr.GetFeatureCount() < 10
|
||||
|
||||
lyr.SetAttributeFilter("true")
|
||||
lyr.SetSpatialFilter(None)
|
||||
assert lyr.GetFeatureCount() == 10
|
||||
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
|
||||
assert lyr.GetFeatureCount() == 10
|
||||
|
||||
lyr.SetAttributeFilter("false")
|
||||
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
|
||||
assert lyr.GetFeatureCount() == 0
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
||||
|
||||
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
|
||||
def test_ogr_adbc_duckdb_with_spatial_index(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
|
||||
|
||||
if not _has_libduckdb():
|
||||
pytest.skip("libduckdb.so missing")
|
||||
|
||||
with gdal.config_option(
|
||||
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL
|
||||
):
|
||||
ds = ogr.Open("data/duckdb/poly_with_spatial_index.duckdb")
|
||||
lyr = ds.GetLayer(0)
|
||||
with ds.ExecuteSQL(
|
||||
"SELECT ST_AsText(geometry) FROM read_parquet('data/parquet/poly.parquet')"
|
||||
"SELECT 1 FROM duckdb_extensions() WHERE extension_name='spatial' AND loaded = true"
|
||||
) as sql_lyr:
|
||||
f = sql_lyr.GetNextFeature()
|
||||
assert f.GetField(0).startswith("POLYGON")
|
||||
spatial_loaded = sql_lyr.GetNextFeature() is not None
|
||||
assert lyr.TestCapability(ogr.OLCFastSpatialFilter) == spatial_loaded
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -325,6 +393,30 @@ def test_ogr_adbc_test_ogrsf_parquet_filename_with_glob():
|
|||
assert "ERROR" not in ret
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Run test_ogrsf on a GeoParquet file
|
||||
|
||||
|
||||
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
|
||||
def test_ogr_adbc_test_ogrsf_geoparquet(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
|
||||
|
||||
if not _has_libduckdb():
|
||||
pytest.skip("libduckdb.so missing")
|
||||
|
||||
import test_cli_utilities
|
||||
|
||||
if test_cli_utilities.get_test_ogrsf_path() is None:
|
||||
pytest.skip()
|
||||
|
||||
ret = gdaltest.runexternal(
|
||||
test_cli_utilities.get_test_ogrsf_path()
|
||||
+ f" -ro ADBC:data/parquet/poly.parquet --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
|
||||
)
|
||||
|
||||
assert "INFO" in ret
|
||||
assert "ERROR" not in ret
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Test DATETIME_AS_STRING=YES GetArrowStream() option
|
||||
|
||||
|
@ -359,7 +451,8 @@ def test_ogr_adbc_arrow_stream_numpy_datetime_as_string(tmp_vsimem):
|
|||
# Run test_ogrsf on a DuckDB dataset
|
||||
|
||||
|
||||
def test_ogr_adbc_test_ogrsf_duckdb():
|
||||
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
|
||||
def test_ogr_adbc_test_ogrsf_duckdb(OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL):
|
||||
|
||||
if not _has_libduckdb():
|
||||
pytest.skip("libduckdb.so missing")
|
||||
|
@ -370,7 +463,34 @@ def test_ogr_adbc_test_ogrsf_duckdb():
|
|||
pytest.skip()
|
||||
|
||||
ret = gdaltest.runexternal(
|
||||
test_cli_utilities.get_test_ogrsf_path() + " -ro ADBC:data/duckdb/poly.duckdb"
|
||||
test_cli_utilities.get_test_ogrsf_path()
|
||||
+ f" -ro ADBC:data/duckdb/poly.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
|
||||
)
|
||||
|
||||
assert "INFO" in ret
|
||||
assert "ERROR" not in ret
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Run test_ogrsf on a DuckDB dataset
|
||||
|
||||
|
||||
@pytest.mark.parametrize("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", ["ON", "OFF"])
|
||||
def test_ogr_adbc_test_ogrsf_duckdb_with_spatial_index(
|
||||
OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL,
|
||||
):
|
||||
|
||||
if not _has_libduckdb():
|
||||
pytest.skip("libduckdb.so missing")
|
||||
|
||||
import test_cli_utilities
|
||||
|
||||
if test_cli_utilities.get_test_ogrsf_path() is None:
|
||||
pytest.skip()
|
||||
|
||||
ret = gdaltest.runexternal(
|
||||
test_cli_utilities.get_test_ogrsf_path()
|
||||
+ f" -ro ADBC:data/duckdb/poly_with_spatial_index.duckdb --config OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL={OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL}"
|
||||
)
|
||||
|
||||
assert "INFO" in ret
|
||||
|
|
|
@ -25,7 +25,11 @@ Consult the `installation instruction <https://arrow.apache.org/adbc/current/dri
|
|||
for the various ADBC drivers. At time of writing, there are drivers for
|
||||
SQLite3, PostgreSQL, Snowflake, BigQuery, DuckDB, Flight SQL, etc.
|
||||
|
||||
The driver is read-only, and there is no support for spatial data currently.
|
||||
The driver is read-only.
|
||||
|
||||
There is spatial support when the underlying ADBC driver is DuckDB, for
|
||||
native spatial DuckDB databases and GeoParquet datasets, and when the spatial
|
||||
extension is installed.
|
||||
|
||||
Connection string
|
||||
-----------------
|
||||
|
@ -106,6 +110,15 @@ GDAL ADBC driver as a way of locating and loading the ADBC driver if GDAL was
|
|||
not built with ADBC Driver Manager support or if an embedding application has
|
||||
an updated or augmented collection of drivers available.
|
||||
|
||||
Filtering
|
||||
---------
|
||||
|
||||
Attribute filters are passed to the underlying ADBC engine.
|
||||
|
||||
Spatial filters are passed to DuckDB when it is the underlying ADBC engine
|
||||
and for DuckDB spatial databases and GeoParquet datasets. GeoParquet bounding
|
||||
box column and/or DuckDB native RTree spatial indices are used when available.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
|
|
|
@ -87,9 +87,24 @@ class OGRADBCDataset;
|
|||
class OGRADBCLayer final : public OGRLayer,
|
||||
public OGRGetNextFeatureThroughRaw<OGRADBCLayer>
|
||||
{
|
||||
public:
|
||||
//! Describe the bbox column of a geometry column
|
||||
struct GeomColBBOX
|
||||
{
|
||||
std::string osXMin{}; // empty if no bbox column
|
||||
std::string osYMin{};
|
||||
std::string osXMax{};
|
||||
std::string osYMax{};
|
||||
};
|
||||
|
||||
private:
|
||||
friend class OGRADBCDataset;
|
||||
|
||||
OGRADBCDataset *m_poDS = nullptr;
|
||||
const std::string m_osBaseStatement{}; // as provided by user
|
||||
std::string m_osModifiedBaseStatement{}; // above tuned to use ST_AsWKB()
|
||||
std::string m_osModifiedSelect{}; // SELECT part of above
|
||||
std::string m_osAttributeFilter{};
|
||||
std::unique_ptr<AdbcStatement> m_statement{};
|
||||
std::unique_ptr<OGRArrowArrayToOGRFeatureAdapterLayer> m_poAdapterLayer{};
|
||||
std::unique_ptr<OGRArrowArrayStream> m_stream{};
|
||||
|
@ -103,17 +118,27 @@ class OGRADBCLayer final : public OGRLayer,
|
|||
GIntBig m_nFeatureID = 0;
|
||||
bool m_bIsParquetLayer = false;
|
||||
|
||||
std::vector<GeomColBBOX>
|
||||
m_geomColBBOX{}; // same size as GetGeomFieldCount()
|
||||
std::vector<OGREnvelope3D> m_extents{}; // same size as GetGeomFieldCount()
|
||||
|
||||
OGRFeature *GetNextRawFeature();
|
||||
bool GetArrowStreamInternal(struct ArrowArrayStream *out_stream);
|
||||
GIntBig GetFeatureCountParquet();
|
||||
|
||||
void BuildLayerDefn(bool bInternalUse);
|
||||
bool ReplaceStatement(const char *pszNewStatement);
|
||||
bool UpdateStatement();
|
||||
std::string GetCurrentStatement() const;
|
||||
|
||||
CPL_DISALLOW_COPY_ASSIGN(OGRADBCLayer)
|
||||
|
||||
public:
|
||||
OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName,
|
||||
const char *pszStatement,
|
||||
std::unique_ptr<AdbcStatement> poStatement,
|
||||
std::unique_ptr<OGRArrowArrayStream> poStream,
|
||||
ArrowSchema *schema);
|
||||
ArrowSchema *schema, bool bInternalUse);
|
||||
~OGRADBCLayer() override;
|
||||
|
||||
OGRFeatureDefn *GetLayerDefn() override
|
||||
|
@ -128,6 +153,20 @@ class OGRADBCLayer final : public OGRLayer,
|
|||
bool GetArrowStream(struct ArrowArrayStream *out_stream,
|
||||
CSLConstList papszOptions = nullptr) override;
|
||||
GIntBig GetFeatureCount(int bForce) override;
|
||||
|
||||
void SetSpatialFilter(OGRGeometry *poGeom) override
|
||||
{
|
||||
SetSpatialFilter(0, poGeom);
|
||||
}
|
||||
|
||||
OGRErr SetAttributeFilter(const char *pszFilter) override;
|
||||
void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override;
|
||||
|
||||
OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override;
|
||||
OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent,
|
||||
int bForce = TRUE) override;
|
||||
OGRErr GetExtent3D(int iGeomField, OGREnvelope3D *psExtent,
|
||||
int bForce = TRUE) override;
|
||||
};
|
||||
|
||||
/************************************************************************/
|
||||
|
@ -143,6 +182,8 @@ class OGRADBCDataset final : public GDALDataset
|
|||
std::unique_ptr<AdbcConnection> m_connection{};
|
||||
std::vector<std::unique_ptr<OGRLayer>> m_apoLayers{};
|
||||
std::string m_osParquetFilename{};
|
||||
bool m_bIsDuckDB = false;
|
||||
bool m_bSpatialLoaded = false;
|
||||
|
||||
public:
|
||||
OGRADBCDataset() = default;
|
||||
|
@ -164,7 +205,13 @@ class OGRADBCDataset final : public GDALDataset
|
|||
OGRLayer *GetLayerByName(const char *pszName) override;
|
||||
|
||||
std::unique_ptr<OGRADBCLayer> CreateLayer(const char *pszStatement,
|
||||
const char *pszLayerName);
|
||||
const char *pszLayerName,
|
||||
bool bInternalUse);
|
||||
|
||||
std::unique_ptr<OGRADBCLayer> CreateInternalLayer(const char *pszStatement)
|
||||
{
|
||||
return CreateLayer(pszStatement, "temp", true);
|
||||
}
|
||||
|
||||
OGRLayer *ExecuteSQL(const char *pszStatement, OGRGeometry *poSpatialFilter,
|
||||
const char *pszDialect) override;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "ogradbcdrivercore.h"
|
||||
#include "ogr_mem.h"
|
||||
#include "ogr_p.h"
|
||||
#include "cpl_error.h"
|
||||
#include "cpl_json.h"
|
||||
#include "gdal_adbc.h"
|
||||
|
||||
|
@ -79,7 +80,8 @@ OGRADBCDataset::~OGRADBCDataset()
|
|||
/************************************************************************/
|
||||
|
||||
std::unique_ptr<OGRADBCLayer>
|
||||
OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName)
|
||||
OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName,
|
||||
bool bInternalUse)
|
||||
{
|
||||
|
||||
OGRADBCError error;
|
||||
|
@ -164,7 +166,8 @@ OGRADBCDataset::CreateLayer(const char *pszStatement, const char *pszLayerName)
|
|||
}
|
||||
|
||||
return std::make_unique<OGRADBCLayer>(
|
||||
this, pszLayerName, std::move(statement), std::move(stream), &schema);
|
||||
this, pszLayerName, osStatement.c_str(), std::move(statement),
|
||||
std::move(stream), &schema, bInternalUse);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
@ -181,7 +184,7 @@ OGRLayer *OGRADBCDataset::ExecuteSQL(const char *pszStatement,
|
|||
pszDialect);
|
||||
}
|
||||
|
||||
auto poLayer = CreateLayer(pszStatement, "RESULTSET");
|
||||
auto poLayer = CreateLayer(pszStatement, "RESULTSET", false);
|
||||
if (poLayer && poSpatialFilter)
|
||||
{
|
||||
if (poLayer->GetGeomType() == wkbNone)
|
||||
|
@ -211,7 +214,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
}
|
||||
const char *pszADBCDriverName =
|
||||
CSLFetchNameValue(poOpenInfo->papszOpenOptions, "ADBC_DRIVER");
|
||||
const bool bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo);
|
||||
m_bIsDuckDB = OGRADBCDriverIsDuckDB(poOpenInfo);
|
||||
const bool bIsSQLite3 =
|
||||
(pszADBCDriverName && EQUAL(pszADBCDriverName, "adbc_driver_sqlite")) ||
|
||||
OGRADBCDriverIsSQLite3(poOpenInfo);
|
||||
|
@ -221,7 +224,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
|
||||
if (!pszADBCDriverName)
|
||||
{
|
||||
if (bIsDuckDB || bIsParquet)
|
||||
if (m_bIsDuckDB || bIsParquet)
|
||||
{
|
||||
pszADBCDriverName =
|
||||
#ifdef _WIN32
|
||||
|
@ -249,7 +252,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
|
||||
// Load the driver
|
||||
if (pszADBCDriverName &&
|
||||
(bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
|
||||
(m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
|
||||
{
|
||||
if (OGRADBCLoadDriver(pszADBCDriverName, "duckdb_adbc_init", &m_driver,
|
||||
error) != ADBC_STATUS_OK)
|
||||
|
@ -280,7 +283,7 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
|
||||
// Set options
|
||||
if (pszADBCDriverName &&
|
||||
(bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
|
||||
(m_bIsDuckDB || bIsParquet || strstr(pszADBCDriverName, "duckdb")))
|
||||
{
|
||||
if (ADBC_CALL(DatabaseSetOption, &m_database, "path",
|
||||
bIsParquet ? ":memory:" : pszFilename,
|
||||
|
@ -346,9 +349,30 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
for (const char *pszStatement :
|
||||
cpl::Iterate(CSLConstList(papszPreludeStatements)))
|
||||
{
|
||||
CreateLayer(pszStatement, "temp");
|
||||
CreateInternalLayer(pszStatement);
|
||||
}
|
||||
CSLDestroy(papszPreludeStatements);
|
||||
if ((bIsParquet || m_bIsDuckDB) &&
|
||||
CPLTestBool(
|
||||
CPLGetConfigOption("OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", "ON")))
|
||||
{
|
||||
auto poTmpLayer =
|
||||
CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE "
|
||||
"extension_name='spatial' AND loaded = false");
|
||||
if (poTmpLayer && std::unique_ptr<OGRFeature>(
|
||||
poTmpLayer->GetNextFeature()) != nullptr)
|
||||
{
|
||||
CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
|
||||
CreateInternalLayer("LOAD spatial");
|
||||
}
|
||||
|
||||
poTmpLayer =
|
||||
CreateInternalLayer("SELECT 1 FROM duckdb_extensions() WHERE "
|
||||
"extension_name='spatial' AND loaded = true");
|
||||
m_bSpatialLoaded =
|
||||
poTmpLayer && std::unique_ptr<OGRFeature>(
|
||||
poTmpLayer->GetNextFeature()) != nullptr;
|
||||
}
|
||||
|
||||
std::string osLayerName = "RESULTSET";
|
||||
std::string osSQL;
|
||||
|
@ -374,18 +398,89 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
{
|
||||
if (pszSQL[0])
|
||||
{
|
||||
auto poLayer = CreateLayer(pszSQL, osLayerName.c_str());
|
||||
std::unique_ptr<OGRADBCLayer> poLayer;
|
||||
if ((bIsParquet || m_bIsDuckDB) && m_bSpatialLoaded)
|
||||
{
|
||||
std::string osErrorMsg;
|
||||
{
|
||||
CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
|
||||
poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false);
|
||||
if (!poLayer)
|
||||
osErrorMsg = CPLGetLastErrorMsg();
|
||||
}
|
||||
if (!poLayer)
|
||||
{
|
||||
CPLDebug("ADBC",
|
||||
"Connecting with 'LOAD spatial' did not work "
|
||||
"(%s). Retrying without it",
|
||||
osErrorMsg.c_str());
|
||||
ADBC_CALL(ConnectionRelease, m_connection.get(), error);
|
||||
m_connection.reset();
|
||||
|
||||
ADBC_CALL(DatabaseRelease, &m_database, error);
|
||||
memset(&m_database, 0, sizeof(m_database));
|
||||
|
||||
if (ADBC_CALL(DatabaseNew, &m_database, error) !=
|
||||
ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcDatabaseNew() failed: %s",
|
||||
error.message());
|
||||
return false;
|
||||
}
|
||||
if (ADBC_CALL(DatabaseSetOption, &m_database, "path",
|
||||
":memory:", error) != ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcDatabaseSetOption() failed: %s",
|
||||
error.message());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ADBC_CALL(DatabaseInit, &m_database, error) !=
|
||||
ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcDatabaseInit() failed: %s",
|
||||
error.message());
|
||||
return false;
|
||||
}
|
||||
|
||||
m_connection = std::make_unique<AdbcConnection>();
|
||||
if (ADBC_CALL(ConnectionNew, m_connection.get(), error) !=
|
||||
ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcConnectionNew() failed: %s",
|
||||
error.message());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ADBC_CALL(ConnectionInit, m_connection.get(),
|
||||
&m_database, error) != ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcConnectionInit() failed: %s",
|
||||
error.message());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!poLayer)
|
||||
return false;
|
||||
{
|
||||
poLayer = CreateLayer(pszSQL, osLayerName.c_str(), false);
|
||||
if (!poLayer)
|
||||
return false;
|
||||
}
|
||||
|
||||
poLayer->m_bIsParquetLayer = bIsParquetLayer;
|
||||
m_apoLayers.emplace_back(std::move(poLayer));
|
||||
}
|
||||
}
|
||||
else if (bIsDuckDB || bIsSQLite3)
|
||||
else if (m_bIsDuckDB || bIsSQLite3)
|
||||
{
|
||||
auto poLayerList = CreateLayer(
|
||||
"SELECT name FROM sqlite_master WHERE type IN ('table', 'view')",
|
||||
"LAYERLIST");
|
||||
auto poLayerList = CreateInternalLayer(
|
||||
"SELECT name FROM sqlite_master WHERE type IN ('table', 'view')");
|
||||
if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 1)
|
||||
{
|
||||
return false;
|
||||
|
@ -400,7 +495,8 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
CPLSPrintf("SELECT * FROM \"%s\"",
|
||||
OGRDuplicateCharacter(pszLayerName, '"').c_str());
|
||||
CPLTurnFailureIntoWarning(true);
|
||||
auto poLayer = CreateLayer(osStatement.c_str(), pszLayerName);
|
||||
auto poLayer =
|
||||
CreateLayer(osStatement.c_str(), pszLayerName, false);
|
||||
CPLTurnFailureIntoWarning(false);
|
||||
if (poLayer)
|
||||
{
|
||||
|
@ -410,13 +506,12 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
}
|
||||
else if (bIsPostgreSQL)
|
||||
{
|
||||
auto poLayerList = CreateLayer(
|
||||
auto poLayerList = CreateInternalLayer(
|
||||
"SELECT n.nspname, c.relname FROM pg_class c "
|
||||
"JOIN pg_namespace n ON c.relnamespace = n.oid "
|
||||
"AND c.relkind in ('r','v','m','f') "
|
||||
"AND n.nspname NOT IN ('pg_catalog', 'information_schema') "
|
||||
"ORDER BY c.oid",
|
||||
"LAYERLIST");
|
||||
"ORDER BY c.oid");
|
||||
if (!poLayerList || poLayerList->GetLayerDefn()->GetFieldCount() != 2)
|
||||
{
|
||||
return false;
|
||||
|
@ -432,9 +527,9 @@ bool OGRADBCDataset::Open(const GDALOpenInfo *poOpenInfo)
|
|||
OGRDuplicateCharacter(pszTableName, '"').c_str());
|
||||
|
||||
CPLTurnFailureIntoWarning(true);
|
||||
auto poLayer =
|
||||
CreateLayer(osStatement.c_str(),
|
||||
CPLSPrintf("%s.%s", pszNamespace, pszTableName));
|
||||
auto poLayer = CreateLayer(
|
||||
osStatement.c_str(),
|
||||
CPLSPrintf("%s.%s", pszNamespace, pszTableName), false);
|
||||
CPLTurnFailureIntoWarning(false);
|
||||
if (poLayer)
|
||||
{
|
||||
|
@ -474,8 +569,9 @@ OGRLayer *OGRADBCDataset::GetLayerByName(const char *pszName)
|
|||
}
|
||||
|
||||
auto statement = std::make_unique<AdbcStatement>();
|
||||
OGRADBCLayer tmpLayer(this, "", std::move(statement),
|
||||
std::move(objectsStream), &schema);
|
||||
OGRADBCLayer tmpLayer(this, "", "", std::move(statement),
|
||||
std::move(objectsStream), &schema,
|
||||
/* bInternalUse = */ true);
|
||||
const auto tmpLayerDefn = tmpLayer.GetLayerDefn();
|
||||
if (tmpLayerDefn->GetFieldIndex("catalog_name") < 0 ||
|
||||
tmpLayerDefn->GetFieldIndex("catalog_db_schemas") < 0)
|
||||
|
|
|
@ -12,31 +12,434 @@
|
|||
****************************************************************************/
|
||||
|
||||
#include "ogr_adbc.h"
|
||||
#include "ogr_spatialref.h"
|
||||
#include "ogr_p.h"
|
||||
#include "cpl_json.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#define ADBC_CALL(func, ...) m_poDS->m_driver.func(__VA_ARGS__)
|
||||
|
||||
/************************************************************************/
|
||||
/* GetGeometryTypeFromString() */
|
||||
/************************************************************************/
|
||||
|
||||
static OGRwkbGeometryType GetGeometryTypeFromString(const std::string &osType)
|
||||
{
|
||||
OGRwkbGeometryType eGeomType = wkbUnknown;
|
||||
OGRReadWKTGeometryType(osType.c_str(), &eGeomType);
|
||||
if (eGeomType == wkbUnknown && !osType.empty())
|
||||
{
|
||||
CPLDebug("ADBC", "Unknown geometry type: %s", osType.c_str());
|
||||
}
|
||||
return eGeomType;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* OGRADBCLayer() */
|
||||
/************************************************************************/
|
||||
|
||||
OGRADBCLayer::OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName,
|
||||
const char *pszStatement,
|
||||
std::unique_ptr<AdbcStatement> poStatement,
|
||||
std::unique_ptr<OGRArrowArrayStream> poStream,
|
||||
ArrowSchema *schema)
|
||||
: m_poDS(poDS), m_statement(std::move(poStatement)),
|
||||
m_stream(std::move(poStream))
|
||||
ArrowSchema *schema, bool bInternalUse)
|
||||
: m_poDS(poDS), m_osBaseStatement(pszStatement),
|
||||
m_osModifiedBaseStatement(m_osBaseStatement),
|
||||
m_statement(std::move(poStatement)), m_stream(std::move(poStream))
|
||||
{
|
||||
SetDescription(pszName);
|
||||
|
||||
memcpy(&m_schema, schema, sizeof(m_schema));
|
||||
schema->release = nullptr;
|
||||
|
||||
m_poAdapterLayer =
|
||||
std::make_unique<OGRArrowArrayToOGRFeatureAdapterLayer>(pszName);
|
||||
BuildLayerDefn(bInternalUse);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* ParseGeometryColumnCovering() */
|
||||
/************************************************************************/
|
||||
|
||||
//! Parse bounding box column definition
|
||||
static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef,
|
||||
std::string &osBBOXColumn,
|
||||
std::string &osXMin,
|
||||
std::string &osYMin,
|
||||
std::string &osXMax,
|
||||
std::string &osYMax)
|
||||
{
|
||||
const auto oCovering = oJSONDef["covering"];
|
||||
if (oCovering.IsValid() &&
|
||||
oCovering.GetType() == CPLJSONObject::Type::Object)
|
||||
{
|
||||
const auto oBBOX = oCovering["bbox"];
|
||||
if (oBBOX.IsValid() && oBBOX.GetType() == CPLJSONObject::Type::Object)
|
||||
{
|
||||
const auto oXMin = oBBOX["xmin"];
|
||||
const auto oYMin = oBBOX["ymin"];
|
||||
const auto oXMax = oBBOX["xmax"];
|
||||
const auto oYMax = oBBOX["ymax"];
|
||||
if (oXMin.IsValid() && oYMin.IsValid() && oXMax.IsValid() &&
|
||||
oYMax.IsValid() &&
|
||||
oXMin.GetType() == CPLJSONObject::Type::Array &&
|
||||
oYMin.GetType() == CPLJSONObject::Type::Array &&
|
||||
oXMax.GetType() == CPLJSONObject::Type::Array &&
|
||||
oYMax.GetType() == CPLJSONObject::Type::Array)
|
||||
{
|
||||
const auto osXMinArray = oXMin.ToArray();
|
||||
const auto osYMinArray = oYMin.ToArray();
|
||||
const auto osXMaxArray = oXMax.ToArray();
|
||||
const auto osYMaxArray = oYMax.ToArray();
|
||||
if (osXMinArray.Size() == 2 && osYMinArray.Size() == 2 &&
|
||||
osXMaxArray.Size() == 2 && osYMaxArray.Size() == 2 &&
|
||||
osXMinArray[0].GetType() == CPLJSONObject::Type::String &&
|
||||
osXMinArray[1].GetType() == CPLJSONObject::Type::String &&
|
||||
osYMinArray[0].GetType() == CPLJSONObject::Type::String &&
|
||||
osYMinArray[1].GetType() == CPLJSONObject::Type::String &&
|
||||
osXMaxArray[0].GetType() == CPLJSONObject::Type::String &&
|
||||
osXMaxArray[1].GetType() == CPLJSONObject::Type::String &&
|
||||
osYMaxArray[0].GetType() == CPLJSONObject::Type::String &&
|
||||
osYMaxArray[1].GetType() == CPLJSONObject::Type::String &&
|
||||
osXMinArray[0].ToString() == osYMinArray[0].ToString() &&
|
||||
osXMinArray[0].ToString() == osXMaxArray[0].ToString() &&
|
||||
osXMinArray[0].ToString() == osYMaxArray[0].ToString())
|
||||
{
|
||||
osBBOXColumn = osXMinArray[0].ToString();
|
||||
osXMin = osXMinArray[1].ToString();
|
||||
osYMin = osYMinArray[1].ToString();
|
||||
osXMax = osXMaxArray[1].ToString();
|
||||
osYMax = osYMaxArray[1].ToString();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* ParseGeoParquetColumn() */
|
||||
/************************************************************************/
|
||||
|
||||
static void ParseGeoParquetColumn(
|
||||
const CPLJSONObject &oColumn,
|
||||
std::map<std::string, OGRwkbGeometryType> &oMapType,
|
||||
std::map<std::string, OGREnvelope3D> &oMapExtent,
|
||||
std::map<std::string, OGRADBCLayer::GeomColBBOX>
|
||||
&oMapGeomColumnToCoveringBBOXColumn,
|
||||
std::map<std::string, std::unique_ptr<OGRSpatialReference>>
|
||||
&oMapGeomColumnsFromGeoParquet,
|
||||
std::set<std::string> &oSetCoveringBBoxColumn)
|
||||
{
|
||||
auto oCrs = oColumn.GetObj("crs");
|
||||
if (!oCrs.IsValid())
|
||||
{
|
||||
// WGS 84 is implied if no crs member is found.
|
||||
auto poSRS = std::make_unique<OGRSpatialReference>();
|
||||
poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
|
||||
poSRS->importFromEPSG(4326);
|
||||
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = std::move(poSRS);
|
||||
}
|
||||
else if (oCrs.GetType() == CPLJSONObject::Type::Object)
|
||||
{
|
||||
// CRS encoded as PROJJSON (extension)
|
||||
const auto oType = oCrs["type"];
|
||||
if (oType.IsValid() && oType.GetType() == CPLJSONObject::Type::String)
|
||||
{
|
||||
const auto osType = oType.ToString();
|
||||
if (osType.find("CRS") != std::string::npos)
|
||||
{
|
||||
auto poSRS = std::make_unique<OGRSpatialReference>();
|
||||
poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
|
||||
|
||||
if (poSRS->SetFromUserInput(oCrs.ToString().c_str()) ==
|
||||
OGRERR_NONE)
|
||||
{
|
||||
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] =
|
||||
std::move(poSRS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
oMapGeomColumnsFromGeoParquet[oColumn.GetName()] = nullptr;
|
||||
}
|
||||
|
||||
OGRwkbGeometryType eGeomType = wkbUnknown;
|
||||
auto oType = oColumn.GetObj("geometry_types");
|
||||
if (oType.GetType() == CPLJSONObject::Type::Array)
|
||||
{
|
||||
const auto oTypeArray = oType.ToArray();
|
||||
if (oTypeArray.Size() == 1)
|
||||
{
|
||||
eGeomType = GetGeometryTypeFromString(oTypeArray[0].ToString());
|
||||
}
|
||||
else if (oTypeArray.Size() > 1)
|
||||
{
|
||||
const auto PromoteToCollection = [](OGRwkbGeometryType eType)
|
||||
{
|
||||
if (eType == wkbPoint)
|
||||
return wkbMultiPoint;
|
||||
if (eType == wkbLineString)
|
||||
return wkbMultiLineString;
|
||||
if (eType == wkbPolygon)
|
||||
return wkbMultiPolygon;
|
||||
return eType;
|
||||
};
|
||||
bool bMixed = false;
|
||||
bool bHasMulti = false;
|
||||
bool bHasZ = false;
|
||||
bool bHasM = false;
|
||||
const auto eFirstType = OGR_GT_Flatten(
|
||||
GetGeometryTypeFromString(oTypeArray[0].ToString()));
|
||||
const auto eFirstTypeCollection = PromoteToCollection(eFirstType);
|
||||
for (int i = 0; i < oTypeArray.Size(); ++i)
|
||||
{
|
||||
const auto eThisGeom =
|
||||
GetGeometryTypeFromString(oTypeArray[i].ToString());
|
||||
if (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) !=
|
||||
eFirstTypeCollection)
|
||||
{
|
||||
bMixed = true;
|
||||
break;
|
||||
}
|
||||
bHasZ |= OGR_GT_HasZ(eThisGeom) != FALSE;
|
||||
bHasM |= OGR_GT_HasM(eThisGeom) != FALSE;
|
||||
bHasMulti |= (PromoteToCollection(OGR_GT_Flatten(eThisGeom)) ==
|
||||
OGR_GT_Flatten(eThisGeom));
|
||||
}
|
||||
if (!bMixed)
|
||||
{
|
||||
if (eFirstTypeCollection == wkbMultiPolygon ||
|
||||
eFirstTypeCollection == wkbMultiLineString)
|
||||
{
|
||||
if (bHasMulti)
|
||||
eGeomType = OGR_GT_SetModifier(eFirstTypeCollection,
|
||||
bHasZ, bHasM);
|
||||
else
|
||||
eGeomType =
|
||||
OGR_GT_SetModifier(eFirstType, bHasZ, bHasM);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
oMapType[oColumn.GetName()] = eGeomType;
|
||||
|
||||
OGREnvelope3D sExtent;
|
||||
const auto oBBox = oColumn.GetArray("bbox");
|
||||
if (oBBox.IsValid() && oBBox.Size() == 4)
|
||||
{
|
||||
sExtent.MinX = oBBox[0].ToDouble();
|
||||
sExtent.MinY = oBBox[1].ToDouble();
|
||||
sExtent.MinZ = std::numeric_limits<double>::infinity();
|
||||
sExtent.MaxX = oBBox[2].ToDouble();
|
||||
sExtent.MaxY = oBBox[3].ToDouble();
|
||||
sExtent.MaxZ = -std::numeric_limits<double>::infinity();
|
||||
if (sExtent.MinX <= sExtent.MaxX)
|
||||
{
|
||||
oMapExtent[oColumn.GetName()] = sExtent;
|
||||
}
|
||||
}
|
||||
else if (oBBox.IsValid() && oBBox.Size() == 6)
|
||||
{
|
||||
sExtent.MinX = oBBox[0].ToDouble();
|
||||
sExtent.MinY = oBBox[1].ToDouble();
|
||||
sExtent.MinZ = oBBox[2].ToDouble();
|
||||
sExtent.MaxX = oBBox[3].ToDouble();
|
||||
sExtent.MaxY = oBBox[4].ToDouble();
|
||||
sExtent.MaxZ = oBBox[5].ToDouble();
|
||||
if (sExtent.MinX <= sExtent.MaxX)
|
||||
{
|
||||
oMapExtent[oColumn.GetName()] = sExtent;
|
||||
}
|
||||
}
|
||||
|
||||
std::string osBBOXColumn;
|
||||
std::string osXMin, osYMin, osXMax, osYMax;
|
||||
if (ParseGeometryColumnCovering(oColumn, osBBOXColumn, osXMin, osYMin,
|
||||
osXMax, osYMax))
|
||||
{
|
||||
OGRADBCLayer::GeomColBBOX geomColBBOX;
|
||||
const std::string osPrefix =
|
||||
std::string("\"")
|
||||
.append(OGRDuplicateCharacter(osBBOXColumn, '"'))
|
||||
.append("\".\"");
|
||||
geomColBBOX.osXMin = std::string(osPrefix)
|
||||
.append(OGRDuplicateCharacter(osXMin, '"'))
|
||||
.append("\"");
|
||||
geomColBBOX.osYMin = std::string(osPrefix)
|
||||
.append(OGRDuplicateCharacter(osYMin, '"'))
|
||||
.append("\"");
|
||||
geomColBBOX.osXMax = std::string(osPrefix)
|
||||
.append(OGRDuplicateCharacter(osXMax, '"'))
|
||||
.append("\"");
|
||||
geomColBBOX.osYMax = std::string(osPrefix)
|
||||
.append(OGRDuplicateCharacter(osYMax, '"'))
|
||||
.append("\"");
|
||||
oMapGeomColumnToCoveringBBOXColumn[oColumn.GetName()] = geomColBBOX;
|
||||
oSetCoveringBBoxColumn.insert(osBBOXColumn);
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* BuildLayerDefn() */
|
||||
/************************************************************************/
|
||||
|
||||
void OGRADBCLayer::BuildLayerDefn(bool bInternalUse)
|
||||
{
|
||||
// Identify geometry columns for Parquet files, and query them with
|
||||
// ST_AsWKB() to avoid getting duckdb_spatial own geometry encoding
|
||||
// (https://github.com/duckdb/duckdb_spatial/blob/a60aa3733741a99c49baaf33390c0f7c8a9598a3/spatial/src/spatial/core/geometry/geometry_serialization.cpp#L11)
|
||||
std::map<std::string, std::unique_ptr<OGRSpatialReference>> oMapGeomColumns;
|
||||
std::map<std::string, OGRwkbGeometryType> oMapType;
|
||||
std::map<std::string, OGREnvelope3D> oMapExtent;
|
||||
std::map<std::string, GeomColBBOX> oMapGeomColumnToCoveringBBOXColumn;
|
||||
if (!bInternalUse && STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT ") &&
|
||||
(m_poDS->m_bIsDuckDB ||
|
||||
(!m_poDS->m_osParquetFilename.empty() &&
|
||||
CPLString(m_osBaseStatement)
|
||||
.ifind(std::string(" FROM '").append(OGRDuplicateCharacter(
|
||||
m_poDS->m_osParquetFilename, '\''))) !=
|
||||
std::string::npos)))
|
||||
{
|
||||
// Try to read GeoParquet 'geo' metadata
|
||||
std::map<std::string, std::unique_ptr<OGRSpatialReference>>
|
||||
oMapGeomColumnsFromGeoParquet;
|
||||
std::set<std::string> oSetCoveringBBoxColumn;
|
||||
|
||||
std::string osGeoParquetMD;
|
||||
if (!m_poDS->m_osParquetFilename.empty())
|
||||
{
|
||||
auto poMetadataLayer = m_poDS->CreateInternalLayer(
|
||||
std::string("SELECT value FROM parquet_kv_metadata('")
|
||||
.append(OGRDuplicateCharacter(m_poDS->m_osParquetFilename,
|
||||
'\''))
|
||||
.append("') WHERE key = 'geo'")
|
||||
.c_str());
|
||||
if (poMetadataLayer)
|
||||
{
|
||||
auto f = std::unique_ptr<OGRFeature>(
|
||||
poMetadataLayer->GetNextFeature());
|
||||
if (f)
|
||||
{
|
||||
int nBytes = 0;
|
||||
const GByte *pabyData = f->GetFieldAsBinary(0, &nBytes);
|
||||
osGeoParquetMD.assign(
|
||||
reinterpret_cast<const char *>(pabyData), nBytes);
|
||||
// CPLDebug("ADBC", "%s", osGeoParquetMD.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
CPLJSONDocument oDoc;
|
||||
if (!osGeoParquetMD.empty() && oDoc.LoadMemory(osGeoParquetMD))
|
||||
{
|
||||
const auto oColums = oDoc.GetRoot().GetObj("columns");
|
||||
for (const auto &oColumn : oColums.GetChildren())
|
||||
{
|
||||
if (oColumn.GetString("encoding") == "WKB")
|
||||
{
|
||||
ParseGeoParquetColumn(oColumn, oMapType, oMapExtent,
|
||||
oMapGeomColumnToCoveringBBOXColumn,
|
||||
oMapGeomColumnsFromGeoParquet,
|
||||
oSetCoveringBBoxColumn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto poDescribeLayer = m_poDS->CreateInternalLayer(
|
||||
std::string("DESCRIBE ").append(m_osBaseStatement).c_str());
|
||||
std::string osNewStatement;
|
||||
bool bNewStatement = false;
|
||||
if (poDescribeLayer &&
|
||||
// cppcheck-suppress knownConditionTrueFalse
|
||||
(m_poDS->m_bIsDuckDB || !oMapGeomColumnsFromGeoParquet.empty()))
|
||||
{
|
||||
for (auto &&f : *poDescribeLayer)
|
||||
{
|
||||
const char *pszColName = f->GetFieldAsString("column_name");
|
||||
if (cpl::contains(oSetCoveringBBoxColumn, pszColName))
|
||||
{
|
||||
bNewStatement = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// f->DumpReadable(stdout);
|
||||
if (osNewStatement.empty())
|
||||
osNewStatement = "SELECT ";
|
||||
else
|
||||
osNewStatement += ", ";
|
||||
|
||||
auto oIter = oMapGeomColumnsFromGeoParquet.find(pszColName);
|
||||
if (oIter != oMapGeomColumnsFromGeoParquet.end())
|
||||
{
|
||||
oMapGeomColumns[pszColName] = std::move(oIter->second);
|
||||
}
|
||||
if (EQUAL(f->GetFieldAsString("column_type"), "GEOMETRY") &&
|
||||
m_poDS->m_bSpatialLoaded)
|
||||
{
|
||||
bNewStatement = true;
|
||||
osNewStatement += "ST_AsWKB(\"";
|
||||
osNewStatement += OGRDuplicateCharacter(pszColName, '"');
|
||||
osNewStatement += "\") AS ";
|
||||
if (oIter == oMapGeomColumnsFromGeoParquet.end())
|
||||
oMapGeomColumns[pszColName] = nullptr;
|
||||
}
|
||||
osNewStatement += '"';
|
||||
osNewStatement += OGRDuplicateCharacter(pszColName, '"');
|
||||
osNewStatement += '"';
|
||||
}
|
||||
m_osModifiedSelect = osNewStatement;
|
||||
osNewStatement += " FROM (";
|
||||
osNewStatement += m_osBaseStatement;
|
||||
osNewStatement += " )";
|
||||
}
|
||||
|
||||
if (bNewStatement)
|
||||
{
|
||||
// CPLDebug("ADBC", "%s -> %s", m_osBaseStatement.c_str(), osNewStatement.c_str());
|
||||
if (ReplaceStatement(osNewStatement.c_str()))
|
||||
{
|
||||
m_osModifiedBaseStatement = osNewStatement;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_osModifiedSelect.clear();
|
||||
oMapGeomColumns.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_poAdapterLayer = std::make_unique<OGRArrowArrayToOGRFeatureAdapterLayer>(
|
||||
GetDescription());
|
||||
|
||||
for (int i = 0; i < m_schema.n_children; ++i)
|
||||
{
|
||||
m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]);
|
||||
const char *pszColName = m_schema.children[i]->name;
|
||||
auto oIter = oMapGeomColumns.find(pszColName);
|
||||
if (oIter != oMapGeomColumns.end())
|
||||
{
|
||||
OGRGeomFieldDefn oGeomFieldDefn(pszColName, oMapType[pszColName]);
|
||||
auto poSRS = std::move(oIter->second).release();
|
||||
if (poSRS)
|
||||
{
|
||||
oGeomFieldDefn.SetSpatialRef(poSRS);
|
||||
poSRS->Release();
|
||||
}
|
||||
m_poAdapterLayer->m_poLayerDefn->AddGeomFieldDefn(&oGeomFieldDefn);
|
||||
|
||||
m_extents.push_back(oMapExtent[pszColName]);
|
||||
m_geomColBBOX.push_back(
|
||||
oMapGeomColumnToCoveringBBOXColumn[pszColName]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_poAdapterLayer->CreateFieldFromArrowSchema(m_schema.children[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,6 +456,67 @@ OGRADBCLayer::~OGRADBCLayer()
|
|||
m_schema.release(&m_schema);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* ReplaceStatement() */
|
||||
/************************************************************************/
|
||||
|
||||
bool OGRADBCLayer::ReplaceStatement(const char *pszNewStatement)
|
||||
{
|
||||
// CPLDebug("ADBC", "%s", pszNewStatement);
|
||||
OGRADBCError error;
|
||||
auto statement = std::make_unique<AdbcStatement>();
|
||||
if (ADBC_CALL(StatementNew, m_poDS->m_connection.get(), statement.get(),
|
||||
error) != ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined, "AdbcStatementNew() failed: %s",
|
||||
error.message());
|
||||
ADBC_CALL(StatementRelease, statement.get(), error);
|
||||
}
|
||||
else if (ADBC_CALL(StatementSetSqlQuery, statement.get(), pszNewStatement,
|
||||
error) != ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcStatementSetSqlQuery() failed: %s", error.message());
|
||||
error.clear();
|
||||
ADBC_CALL(StatementRelease, statement.get(), error);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto stream = std::make_unique<OGRArrowArrayStream>();
|
||||
int64_t rows_affected = -1;
|
||||
ArrowSchema newSchema;
|
||||
memset(&newSchema, 0, sizeof(newSchema));
|
||||
if (ADBC_CALL(StatementExecuteQuery, statement.get(), stream->get(),
|
||||
&rows_affected, error) != ADBC_STATUS_OK)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"AdbcStatementExecuteQuery() failed: %s", error.message());
|
||||
error.clear();
|
||||
ADBC_CALL(StatementRelease, statement.get(), error);
|
||||
}
|
||||
else if (stream->get_schema(&newSchema) != 0)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined, "get_schema() failed");
|
||||
ADBC_CALL(StatementRelease, statement.get(), error);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_schema.release)
|
||||
m_schema.release(&m_schema);
|
||||
memcpy(&m_schema, &newSchema, sizeof(newSchema));
|
||||
|
||||
if (m_statement)
|
||||
ADBC_CALL(StatementRelease, m_statement.get(), error);
|
||||
m_statement = std::move(statement);
|
||||
|
||||
m_stream = std::move(stream);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* GetNextRawFeature() */
|
||||
/************************************************************************/
|
||||
|
@ -99,6 +563,16 @@ OGRFeature *OGRADBCLayer::GetNextRawFeature()
|
|||
}
|
||||
|
||||
auto poFeature = m_poAdapterLayer->m_apoFeatures[m_nIdx++].release();
|
||||
const int nGeomFieldCount =
|
||||
m_poAdapterLayer->m_poLayerDefn->GetFieldCount();
|
||||
for (int i = 0; i < nGeomFieldCount; ++i)
|
||||
{
|
||||
auto poGeom = poFeature->GetGeomFieldRef(i);
|
||||
if (poGeom)
|
||||
poGeom->assignSpatialReference(
|
||||
m_poAdapterLayer->m_poLayerDefn->GetGeomFieldDefn(i)
|
||||
->GetSpatialRef());
|
||||
}
|
||||
poFeature->SetFID(m_nFeatureID++);
|
||||
return poFeature;
|
||||
}
|
||||
|
@ -119,6 +593,194 @@ void OGRADBCLayer::ResetReading()
|
|||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* GetExtent() */
|
||||
/************************************************************************/
|
||||
|
||||
OGRErr OGRADBCLayer::GetExtent(OGREnvelope *psExtent, int bForce)
|
||||
{
|
||||
return GetExtent(0, psExtent, bForce);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* GetExtent() */
|
||||
/************************************************************************/
|
||||
|
||||
OGRErr OGRADBCLayer::GetExtent(int iGeomField, OGREnvelope *psExtent,
|
||||
int bForce)
|
||||
{
|
||||
if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount())
|
||||
{
|
||||
if (iGeomField != 0)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"Invalid geometry field index : %d", iGeomField);
|
||||
}
|
||||
return OGRERR_FAILURE;
|
||||
}
|
||||
|
||||
*psExtent = m_extents[iGeomField];
|
||||
if (psExtent->IsInit())
|
||||
return OGRERR_NONE;
|
||||
|
||||
return GetExtentInternal(iGeomField, psExtent, bForce);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* GetExtent3D() */
|
||||
/************************************************************************/
|
||||
|
||||
OGRErr OGRADBCLayer::GetExtent3D(int iGeomField, OGREnvelope3D *psExtent,
|
||||
int bForce)
|
||||
{
|
||||
if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount())
|
||||
{
|
||||
if (iGeomField != 0)
|
||||
{
|
||||
CPLError(CE_Failure, CPLE_AppDefined,
|
||||
"Invalid geometry field index : %d", iGeomField);
|
||||
}
|
||||
return OGRERR_FAILURE;
|
||||
}
|
||||
|
||||
*psExtent = m_extents[iGeomField];
|
||||
if (psExtent->IsInit())
|
||||
return OGRERR_NONE;
|
||||
|
||||
return GetExtentInternal(iGeomField, psExtent, bForce);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* GetCurrentStatement() */
|
||||
/************************************************************************/
|
||||
|
||||
std::string OGRADBCLayer::GetCurrentStatement() const
|
||||
{
|
||||
if (!m_osModifiedSelect.empty() &&
|
||||
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") &&
|
||||
(!m_osAttributeFilter.empty() ||
|
||||
(m_poFilterGeom &&
|
||||
(!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() ||
|
||||
m_poDS->m_bSpatialLoaded))))
|
||||
{
|
||||
std::string osStatement(m_osModifiedSelect);
|
||||
osStatement.append(" FROM (")
|
||||
.append(m_osBaseStatement)
|
||||
.append(") WHERE ");
|
||||
|
||||
bool bAddAnd = false;
|
||||
if (m_poFilterGeom)
|
||||
{
|
||||
const double dfMinX = std::isinf(m_sFilterEnvelope.MinX)
|
||||
? -std::numeric_limits<double>::max()
|
||||
: m_sFilterEnvelope.MinX;
|
||||
const double dfMinY = std::isinf(m_sFilterEnvelope.MinY)
|
||||
? -std::numeric_limits<double>::max()
|
||||
: m_sFilterEnvelope.MinY;
|
||||
const double dfMaxX = std::isinf(m_sFilterEnvelope.MaxX)
|
||||
? std::numeric_limits<double>::max()
|
||||
: m_sFilterEnvelope.MaxX;
|
||||
const double dfMaxY = std::isinf(m_sFilterEnvelope.MaxY)
|
||||
? std::numeric_limits<double>::max()
|
||||
: m_sFilterEnvelope.MaxY;
|
||||
if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty())
|
||||
{
|
||||
bAddAnd = true;
|
||||
osStatement.append(m_geomColBBOX[m_iGeomFieldFilter].osXMin)
|
||||
.append(" <= ")
|
||||
.append(CPLSPrintf("%.17g", dfMaxX))
|
||||
.append(" AND ")
|
||||
.append(m_geomColBBOX[m_iGeomFieldFilter].osXMax)
|
||||
.append(" >= ")
|
||||
.append(CPLSPrintf("%.17g", dfMinX))
|
||||
.append(" AND ")
|
||||
.append(m_geomColBBOX[m_iGeomFieldFilter].osYMin)
|
||||
.append(" <= ")
|
||||
.append(CPLSPrintf("%.17g", dfMaxY))
|
||||
.append(" AND ")
|
||||
.append(m_geomColBBOX[m_iGeomFieldFilter].osYMax)
|
||||
.append(" >= ")
|
||||
.append(CPLSPrintf("%.17g", dfMinY));
|
||||
}
|
||||
if (m_poDS->m_bSpatialLoaded)
|
||||
{
|
||||
if (bAddAnd)
|
||||
osStatement.append(" AND ");
|
||||
bAddAnd = true;
|
||||
osStatement.append("ST_Intersects(\"")
|
||||
.append(OGRDuplicateCharacter(
|
||||
m_poAdapterLayer->m_poLayerDefn
|
||||
->GetGeomFieldDefn(m_iGeomFieldFilter)
|
||||
->GetNameRef(),
|
||||
'"'))
|
||||
.append(CPLSPrintf(
|
||||
"\", ST_MakeEnvelope(%.17g,%.17g,%.17g,%.17g))", dfMinX,
|
||||
dfMinY, dfMaxX, dfMaxY));
|
||||
}
|
||||
}
|
||||
if (!m_osAttributeFilter.empty())
|
||||
{
|
||||
if (bAddAnd)
|
||||
osStatement.append(" AND ");
|
||||
osStatement.append("(");
|
||||
osStatement.append(m_osAttributeFilter);
|
||||
osStatement.append(")");
|
||||
}
|
||||
|
||||
return osStatement;
|
||||
}
|
||||
else
|
||||
{
|
||||
return m_osModifiedBaseStatement;
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* UpdateStatement() */
|
||||
/************************************************************************/
|
||||
|
||||
bool OGRADBCLayer::UpdateStatement()
|
||||
{
|
||||
return ReplaceStatement(GetCurrentStatement().c_str());
|
||||
}
|
||||
|
||||
/***********************************************************************/
|
||||
/* SetAttributeFilter() */
|
||||
/***********************************************************************/
|
||||
|
||||
OGRErr OGRADBCLayer::SetAttributeFilter(const char *pszFilter)
|
||||
{
|
||||
if (!m_osModifiedSelect.empty() &&
|
||||
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM "))
|
||||
{
|
||||
m_osAttributeFilter = pszFilter ? pszFilter : "";
|
||||
return UpdateStatement() ? OGRERR_NONE : OGRERR_FAILURE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return OGRLayer::SetAttributeFilter(pszFilter);
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* SetSpatialFilter() */
|
||||
/************************************************************************/
|
||||
|
||||
void OGRADBCLayer::SetSpatialFilter(int iGeomField, OGRGeometry *poGeomIn)
|
||||
|
||||
{
|
||||
if (!ValidateGeometryFieldIndexForSetSpatialFilter(iGeomField, poGeomIn))
|
||||
return;
|
||||
|
||||
if (iGeomField < GetLayerDefn()->GetGeomFieldCount())
|
||||
{
|
||||
m_iGeomFieldFilter = iGeomField;
|
||||
if (InstallFilter(poGeomIn))
|
||||
ResetReading();
|
||||
UpdateStatement();
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
/* TestCapability() */
|
||||
/************************************************************************/
|
||||
|
@ -127,16 +789,46 @@ int OGRADBCLayer::TestCapability(const char *pszCap)
|
|||
{
|
||||
if (EQUAL(pszCap, OLCFastGetArrowStream))
|
||||
{
|
||||
return !m_poFilterGeom && !m_poAttrQuery;
|
||||
return !m_poFilterGeom && !m_poAttrQuery && m_osAttributeFilter.empty();
|
||||
}
|
||||
else if (EQUAL(pszCap, OLCFastFeatureCount))
|
||||
{
|
||||
return !m_poFilterGeom && !m_poAttrQuery && m_bIsParquetLayer;
|
||||
return !m_poFilterGeom && !m_poAttrQuery &&
|
||||
m_osAttributeFilter.empty() && m_bIsParquetLayer;
|
||||
}
|
||||
else
|
||||
else if (EQUAL(pszCap, OLCFastGetExtent))
|
||||
{
|
||||
return false;
|
||||
return !m_extents.empty() && m_extents[0].IsInit();
|
||||
}
|
||||
else if (EQUAL(pszCap, OLCFastSpatialFilter) && m_iGeomFieldFilter >= 0 &&
|
||||
m_iGeomFieldFilter < GetLayerDefn()->GetGeomFieldCount())
|
||||
{
|
||||
if (m_poDS->m_bSpatialLoaded && m_poDS->m_bIsDuckDB)
|
||||
{
|
||||
const char *pszGeomColName =
|
||||
m_poAdapterLayer->m_poLayerDefn
|
||||
->GetGeomFieldDefn(m_iGeomFieldFilter)
|
||||
->GetNameRef();
|
||||
auto poTmpLayer = m_poDS->CreateInternalLayer(CPLSPrintf(
|
||||
"SELECT 1 FROM sqlite_master WHERE tbl_name = '%s' AND type = "
|
||||
"'index' AND (sql LIKE '%%USING RTREE (%s)%%' OR sql LIKE "
|
||||
"'%%USING RTREE (\"%s\")%%')",
|
||||
OGRDuplicateCharacter(GetDescription(), '\'').c_str(),
|
||||
pszGeomColName,
|
||||
OGRDuplicateCharacter(pszGeomColName, '"').c_str()));
|
||||
return poTmpLayer &&
|
||||
std::unique_ptr<OGRFeature>(poTmpLayer->GetNextFeature());
|
||||
}
|
||||
else if (!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty())
|
||||
{
|
||||
// Let's assume that the presence of a geometry bounding box
|
||||
// column is sufficient enough to pretend to have fast spatial
|
||||
// filter capabilities
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
@ -196,8 +888,30 @@ bool OGRADBCLayer::GetArrowStreamInternal(struct ArrowArrayStream *out_stream)
|
|||
|
||||
GIntBig OGRADBCLayer::GetFeatureCount(int bForce)
|
||||
{
|
||||
if (m_poFilterGeom || m_poAttrQuery)
|
||||
if (m_poFilterGeom || m_poAttrQuery || !m_osAttributeFilter.empty())
|
||||
{
|
||||
if (!m_osModifiedSelect.empty() &&
|
||||
STARTS_WITH_CI(m_osBaseStatement.c_str(), "SELECT * FROM ") &&
|
||||
(!m_poFilterGeom ||
|
||||
!m_geomColBBOX[m_iGeomFieldFilter].osXMin.empty() ||
|
||||
m_poDS->m_bSpatialLoaded))
|
||||
{
|
||||
const std::string osCurStatement = GetCurrentStatement();
|
||||
auto poCountLayer = m_poDS->CreateInternalLayer(
|
||||
std::string("SELECT COUNT(*) FROM (")
|
||||
.append(osCurStatement)
|
||||
.append(")")
|
||||
.c_str());
|
||||
if (poCountLayer &&
|
||||
poCountLayer->GetLayerDefn()->GetFieldCount() == 1)
|
||||
{
|
||||
auto poFeature =
|
||||
std::unique_ptr<OGRFeature>(poCountLayer->GetNextFeature());
|
||||
if (poFeature)
|
||||
return poFeature->GetFieldAsInteger64(0);
|
||||
}
|
||||
}
|
||||
|
||||
return OGRLayer::GetFeatureCount(bForce);
|
||||
}
|
||||
|
||||
|
@ -249,7 +963,7 @@ GIntBig OGRADBCLayer::GetFeatureCountParquet()
|
|||
const std::string osSQL(CPLSPrintf(
|
||||
"SELECT CAST(SUM(num_rows) AS BIGINT) FROM parquet_file_metadata('%s')",
|
||||
OGRDuplicateCharacter(m_poDS->m_osParquetFilename, '\'').c_str()));
|
||||
auto poCountLayer = m_poDS->CreateLayer(osSQL.c_str(), "numrows");
|
||||
auto poCountLayer = m_poDS->CreateInternalLayer(osSQL.c_str());
|
||||
if (poCountLayer && poCountLayer->GetLayerDefn()->GetFieldCount() == 1)
|
||||
{
|
||||
auto poFeature =
|
||||
|
|
|
@ -650,6 +650,7 @@ constexpr static const char* const apszKnownConfigOptions[] =
|
|||
"ODS_RESOLVE_FORMULAS", // from ogrodsdatasource.cpp
|
||||
"OGR2OGR_MIN_FEATURES_FOR_THREADED_REPROJ", // from ogr2ogr_lib.cpp
|
||||
"OGR2OGR_USE_ARROW_API", // from ogr2ogr_lib.cpp
|
||||
"OGR_ADBC_AUTO_LOAD_DUCKDB_SPATIAL", // from ogradbcdataset.cpp
|
||||
"OGR_API_SPY_FILE", // from ograpispy.cpp
|
||||
"OGR_API_SPY_SNAPSHOT_PATH", // from ograpispy.cpp
|
||||
"OGR_APPLY_GEOM_SET_PRECISION", // from ogr2ogr_lib.cpp, ogrlayer.cpp
|
||||
|
|
Loading…
Reference in New Issue