gdal/swig/python/gdal-utils/osgeo_utils/ogrmerge.py

1199 lines
42 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
###############################################################################
# $Id$
#
# Project: GDAL/OGR samples
# Purpose: Merge the content of several vector datasets into a single one.
# Author: Even Rouault <even dot rouault at spatialys dot com>
#
###############################################################################
# Copyright (c) 2017, Even Rouault <even dot rouault at spatialys dot com>
# Copyright (c) 2021, Idan Miara <idan@miara.com>
#
# SPDX-License-Identifier: MIT
###############################################################################
import glob
import os
import os.path
import sys
from typing import Optional, Sequence
from osgeo import gdal, ogr, osr
from osgeo_utils.auxiliary.base import PathLikeOrStr
from osgeo_utils.auxiliary.util import GetOutputDriverFor, enable_gdal_exceptions
def Usage(isError):
f = sys.stderr if isError else sys.stdout
print("Usage: ogrmerge [--help] [--help-general]", file=f)
print(" -o <out_dsname> <src_dsname> [<src_dsname>]...", file=f)
print(" [-f <format>] [-single] [-nln <layer_name_template>]", file=f)
print(" [-update | -overwrite_ds] [-append | -overwrite_layer]", file=f)
print(" [-src_geom_type <geom_type_name>[,<geom_type_name>]...]", file=f)
print(" [-dsco <NAME>=<VALUE>]... [-lco <NAME>=<VALUE>]...", file=f)
print(" [-s_srs <srs_def>] [-t_srs <srs_def>|-a_srs <srs_def>]", file=f)
print(" [-progress] [-skipfailures] [--help-general]", file=f)
print("", file=f)
print("Options specific to -single:", file=f)
print(" [-field_strategy {FirstLayer|Union|Intersection}]", file=f)
print(" [-src_layer_field_name <name>]", file=f)
print(" [-src_layer_field_content <layer_name_template>]", file=f)
print("", file=f)
print(
"* layer_name_template can contain the following substituable " "variables:",
file=f,
)
print(
" {AUTO_NAME} : {DS_BASENAME}_{LAYER_NAME} if they are " "different",
file=f,
)
print(" or {LAYER_NAME} if they are identical", file=f)
print(" {DS_NAME} : name of the source dataset", file=f)
print(" {DS_BASENAME}: base name of the source dataset", file=f)
print(" {DS_INDEX} : index of the source dataset", file=f)
print(" {LAYER_NAME} : name of the source layer", file=f)
print(" {LAYER_INDEX}: index of the source layer", file=f)
return 2 if isError else 0
#############################################################################
def _VSIFPrintfL(f, s):
gdal.VSIFWriteL(s, 1, len(s), f)
#############################################################################
def EQUAL(x, y):
return x.lower() == y.lower()
#############################################################################
def _GetGeomType(src_geom_type_name):
if EQUAL(src_geom_type_name, "GEOMETRY"):
return ogr.wkbUnknown
try:
max_geom_type = ogr.wkbTriangle
except Exception:
# GDAL 2.1 compat
max_geom_type = ogr.wkbSurface
for i in range(max_geom_type + 1):
if EQUAL(src_geom_type_name, ogr.GeometryTypeToName(i).replace(" ", "")):
return i
return None
#############################################################################
def _Esc(x):
return gdal.EscapeString(x, gdal.CPLES_XML).decode("UTF-8")
class XMLWriter:
def __init__(self, f):
self.f = f
self.inc = 0
self.elements = []
def _indent(self):
return " " * self.inc
def open_element(self, name, attrs=None):
xml_attrs = ""
if attrs is not None:
for key in attrs:
xml_attrs = xml_attrs + ' %s="%s"' % (
key,
_Esc(attrs[key].encode("utf-8")),
)
x = "%s<%s%s>\n" % (self._indent(), name, xml_attrs)
x = x.encode("utf-8")
_VSIFPrintfL(self.f, x)
self.inc = self.inc + 1
self.elements.append(name)
def write_element_value(self, name, value, attrs=None):
xml_attrs = ""
if attrs is not None:
for key in attrs:
xml_attrs = xml_attrs + ' %s="%s"' % (
key,
_Esc(attrs[key].encode("utf-8")),
)
x = "%s<%s%s>%s</%s>\n" % (
self._indent(),
name,
xml_attrs,
_Esc(value.encode("utf-8")),
name,
)
x = x.encode("utf-8")
_VSIFPrintfL(self.f, x)
def close_element(self, closing_name=None):
self.inc = self.inc - 1
name = self.elements[-1]
if closing_name is not None:
assert name == closing_name
self.elements = self.elements[0:-1]
_VSIFPrintfL(self.f, "%s</%s>\n" % (self._indent(), name))
###############################################################
# process()
@enable_gdal_exceptions
def process(argv, progress=None, progress_arg=None):
if not argv:
return Usage(isError=True)
dst_filename = None
driver_name = None
src_datasets = []
overwrite_ds = False
overwrite_layer = False
update = False
append = False
single_layer = False
layer_name_template = None
skip_failures = False
src_geom_types = []
field_strategy = None
src_layer_field_name = None
src_layer_field_content = None
a_srs = None
s_srs = None
t_srs = None
dsco = []
lco = []
# WARNING: if adding a new option, make sure to update _gpkg_ogrmerge()
# optimized code path, or use the general case.
i = 0
while i < len(argv):
arg = argv[i]
if arg == "--help":
return Usage(isError=False)
elif (arg == "-f" or arg == "-of") and i + 1 < len(argv):
i = i + 1
driver_name = argv[i]
elif arg == "-o" and i + 1 < len(argv):
i = i + 1
dst_filename = argv[i]
elif arg == "-progress":
progress = ogr.TermProgress_nocb
progress_arg = None
elif arg == "-q" or arg == "-quiet":
pass
elif arg[0:5] == "-skip":
skip_failures = True
elif arg == "-update":
update = True
elif arg == "-overwrite_ds":
overwrite_ds = True
elif arg == "-overwrite_layer":
overwrite_layer = True
update = True
elif arg == "-append":
append = True
update = True
elif arg == "-single":
single_layer = True
elif arg == "-a_srs" and i + 1 < len(argv):
i = i + 1
a_srs = argv[i]
elif arg == "-s_srs" and i + 1 < len(argv):
i = i + 1
s_srs = argv[i]
elif arg == "-t_srs" and i + 1 < len(argv):
i = i + 1
t_srs = argv[i]
elif arg == "-nln" and i + 1 < len(argv):
i = i + 1
layer_name_template = argv[i]
elif arg == "-field_strategy" and i + 1 < len(argv):
i = i + 1
field_strategy = argv[i]
elif arg == "-src_layer_field_name" and i + 1 < len(argv):
i = i + 1
src_layer_field_name = argv[i]
elif arg == "-src_layer_field_content" and i + 1 < len(argv):
i = i + 1
src_layer_field_content = argv[i]
elif arg == "-dsco" and i + 1 < len(argv):
i = i + 1
dsco.append(argv[i])
elif arg == "-lco" and i + 1 < len(argv):
i = i + 1
lco.append(argv[i])
elif arg == "-src_geom_type" and i + 1 < len(argv):
i = i + 1
src_geom_type_names = argv[i].split(",")
for src_geom_type_name in src_geom_type_names:
src_geom_type = _GetGeomType(src_geom_type_name)
if src_geom_type is None:
print(
"ERROR: Unrecognized geometry type: %s" % src_geom_type_name,
file=sys.stderr,
)
return 1
src_geom_types.append(src_geom_type)
elif arg[0] == "-":
print("ERROR: Unrecognized argument : %s" % arg, file=sys.stderr)
return Usage(isError=True)
else:
if "*" in arg:
src_datasets += glob.glob(arg)
else:
src_datasets.append(arg)
i = i + 1
if dst_filename is None:
print("Missing -o", file=sys.stderr)
return 1
return ogrmerge(
src_datasets=src_datasets,
dst_filename=dst_filename,
driver_name=driver_name,
overwrite_ds=overwrite_ds,
overwrite_layer=overwrite_layer,
update=update,
append=append,
single_layer=single_layer,
layer_name_template=layer_name_template,
skip_failures=skip_failures,
src_geom_types=src_geom_types,
field_strategy=field_strategy,
src_layer_field_name=src_layer_field_name,
src_layer_field_content=src_layer_field_content,
a_srs=a_srs,
s_srs=s_srs,
t_srs=t_srs,
dsco=dsco,
lco=lco,
progress_callback=progress,
progress_arg=progress_arg,
)
#############################################################################
def _build_layer_name_non_single_mode(
layer_name_template,
src_ds_idx,
src_dsname,
src_lyr_idx,
src_lyr_name,
skip_failures,
):
layer_name = layer_name_template
basename = None
if os.path.exists(src_dsname):
basename = os.path.basename(src_dsname)
if "." in basename:
basename = ".".join(basename.split(".")[0:-1])
if basename == src_lyr_name:
layer_name = layer_name.replace("{AUTO_NAME}", basename)
elif basename is None:
layer_name = layer_name.replace(
"{AUTO_NAME}", "Dataset%d_%s" % (src_ds_idx, src_lyr_name)
)
else:
layer_name = layer_name.replace("{AUTO_NAME}", basename + "_" + src_lyr_name)
if basename is not None:
layer_name = layer_name.replace("{DS_BASENAME}", basename)
elif "{DS_BASENAME}" in layer_name:
if skip_failures:
if "{DS_INDEX}" not in layer_name:
layer_name = layer_name.replace(
"{DS_BASENAME}", "Dataset%d" % src_ds_idx
)
else:
print(
"ERROR: Layer name template %s "
"includes {DS_BASENAME} "
"but %s is not a file" % (layer_name_template, src_dsname),
file=sys.stderr,
)
return None
layer_name = layer_name.replace("{DS_NAME}", "%s" % src_dsname)
layer_name = layer_name.replace("{DS_INDEX}", "%d" % src_ds_idx)
layer_name = layer_name.replace("{LAYER_NAME}", src_lyr_name)
layer_name = layer_name.replace("{LAYER_INDEX}", "%d" % src_lyr_idx)
return layer_name
#############################################################################
def _quote_literal(x):
return x.replace("'", "''")
#############################################################################
def _quote_id(x):
return x.replace('"', '""')
#############################################################################
def _gpkg_get_src_table_size(src_ds, table_name):
try:
# sqlite >= 3.31
sql_lyr = src_ds.ExecuteSQL(
"SELECT pgsize FROM temp.stat WHERE name = '%s' AND aggregate = TRUE"
% _quote_literal(table_name)
)
except Exception:
sql_lyr = src_ds.ExecuteSQL(
"SELECT SUM(pgsize) FROM temp.stat WHERE name = '%s'"
% _quote_literal(table_name)
)
f = sql_lyr.GetNextFeature()
src_table_size = f.GetField(0)
src_ds.ReleaseResultSet(sql_lyr)
return src_table_size
#############################################################################
def _gpkg_has_spatial_index(ds, lyr):
has_spatial_index = False
if lyr.GetGeomType() != ogr.wkbNone:
sql_lyr = ds.ExecuteSQL(
"SELECT HasSpatialIndex('%s', '%s')"
% (_quote_literal(lyr.GetName()), _quote_literal(lyr.GetGeometryColumn()))
)
f = sql_lyr.GetNextFeature()
has_spatial_index = f.GetField(0)
ds.ReleaseResultSet(sql_lyr)
return has_spatial_index
#############################################################################
# Estimate the final .gpkg file size from the contributing source layers
def _gpkg_get_estimated_final_size(
src_datasets, src_geom_types, can_reuse_spatial_index
):
estimated_final_size = 0
for src_dsname in src_datasets:
src_ds = ogr.Open(src_dsname)
if src_ds is None:
continue
dbstat_available = False
try:
src_ds.ExecuteSQL("CREATE VIRTUAL TABLE temp.stat USING dbstat(main)")
dbstat_available = True
except Exception:
pass
use_src_file_size = True
if dbstat_available:
for src_lyr in src_ds:
if src_geom_types:
gt = ogr.GT_Flatten(src_lyr.GetGeomType())
if gt not in src_geom_types:
use_src_file_size = False
break
if not can_reuse_spatial_index and _gpkg_has_spatial_index(
src_ds, src_lyr
):
use_src_file_size = False
break
if use_src_file_size:
# Wrong if not all layers are selected...
estimated_final_size += gdal.VSIStatL(src_dsname).size
else:
for src_lyr in src_ds:
if src_geom_types:
gt = ogr.GT_Flatten(src_lyr.GetGeomType())
if gt not in src_geom_types:
continue
estimated_final_size += _gpkg_get_src_table_size(
src_ds, src_lyr.GetName()
)
if can_reuse_spatial_index and _gpkg_has_spatial_index(src_ds, src_lyr):
src_rtree_prefix = "rtree_%s_%s" % (
src_lyr.GetName(),
src_lyr.GetGeometryColumn(),
)
estimated_final_size += _gpkg_get_src_table_size(
src_ds, src_rtree_prefix + "_node"
)
estimated_final_size += _gpkg_get_src_table_size(
src_ds, src_rtree_prefix + "_rowid"
)
estimated_final_size += _gpkg_get_src_table_size(
src_ds, src_rtree_prefix + "_parent"
)
return estimated_final_size
#############################################################################
def _gpkg_get_srs_id(ds, lyr):
sql = (
"SELECT srs_id FROM gpkg_geometry_columns WHERE table_name = '%s'"
% _quote_literal(lyr.GetName())
)
sql_lyr = ds.ExecuteSQL(sql)
f = sql_lyr.GetNextFeature()
srs_id = f.GetField(0)
ds.ReleaseResultSet(sql_lyr)
return srs_id
#############################################################################
# Optimized implementation of general case for geopackage output, that can be used only:
# - in non-single mode
# - when all sources are geopackages
# - for a newly created dataset
def _gpkg_ogrmerge(
src_datasets: Optional[Sequence[str]] = None,
dst_filename: Optional[PathLikeOrStr] = None,
driver_name: Optional[str] = None,
layer_name_template: Optional[str] = None,
skip_failures: bool = False,
src_geom_types: Optional[Sequence[int]] = None,
a_srs: Optional[str] = None,
s_srs: Optional[str] = None,
t_srs: Optional[str] = None,
dsco: Optional[Sequence[str]] = None,
lco: Optional[Sequence[str]] = None,
progress_callback: Optional = None,
progress_arg: Optional = None,
):
driver_name = "GPKG"
drv = gdal.GetDriverByName(driver_name)
if drv is None:
print("ERROR: Invalid driver: %s" % driver_name, file=sys.stderr)
return 1
dst_ds = drv.Create(dst_filename, 0, 0, 0, gdal.GDT_Unknown, dsco)
if dst_ds is None:
return 1
class ThreadedProgress:
def __init__(self, dst_filename, estimated_final_size):
self.stop_thread = False
import time
from threading import Thread
def myfunc():
while not self.stop_thread:
dst_file_size = gdal.VSIStatL(dst_filename).size
pct = min(1.0, dst_file_size / estimated_final_size)
progress_callback(pct, "", progress_arg)
time.sleep(0.1)
t = Thread(target=myfunc)
t.start()
def stop(self):
self.stop_thread = True
create_spatial_index = "SPATIAL_INDEX=NO" not in [x.upper() for x in lco]
can_reuse_spatial_index = t_srs is None and create_spatial_index
estimated_final_size = 0
if progress_callback:
estimated_final_size = _gpkg_get_estimated_final_size(
src_datasets, src_geom_types, can_reuse_spatial_index
)
for src_ds_idx, src_dsname in enumerate(src_datasets):
src_ds = ogr.Open(src_dsname)
if src_ds is None:
print("ERROR: Cannot open %s" % src_dsname, file=sys.stderr)
if skip_failures:
continue
return 1
for src_lyr_idx, src_lyr in enumerate(src_ds):
if src_geom_types:
gt = ogr.GT_Flatten(src_lyr.GetGeomType())
if gt not in src_geom_types:
continue
has_geom = src_lyr.GetGeomType() != ogr.wkbNone
if has_geom and not any(
opt.upper().startswith("GEOMETRY_NAME=") for opt in lco
):
modified_lco = ["GEOMETRY_NAME=" + src_lyr.GetGeometryColumn()] + lco
else:
modified_lco = lco
if t_srs and has_geom:
srs = osr.SpatialReference()
srs.SetFromUserInput(t_srs)
elif a_srs and has_geom:
srs = osr.SpatialReference()
srs.SetFromUserInput(a_srs)
else:
srs = src_lyr.GetSpatialRef()
layer_name = _build_layer_name_non_single_mode(
layer_name_template,
src_ds_idx,
src_dsname,
src_lyr_idx,
src_lyr.GetName(),
skip_failures,
)
if layer_name is None:
return 1
lyr = dst_ds.CreateLayer(
layer_name,
geom_type=src_lyr.GetGeomType(),
srs=srs,
options=modified_lco,
)
for field_idx in range(src_lyr.GetLayerDefn().GetFieldCount()):
lyr.CreateField(src_lyr.GetLayerDefn().GetFieldDefn(field_idx))
md = src_lyr.GetMetadata()
if md:
lyr.SetMetadata(md)
lyr.SyncToDisk()
if has_geom:
src_srs_id = _gpkg_get_srs_id(src_ds, src_lyr)
dst_srs_id = _gpkg_get_srs_id(dst_ds, lyr)
rtree_prefix = "rtree_%s_%s" % (lyr.GetName(), lyr.GetGeometryColumn())
else:
rtree_prefix = "__invalid__"
src_srs_id = -1
dst_srs_id = -1
# Collect triggers that we can safely temporary disable, and drop them
sql = (
"SELECT name, sql FROM sqlite_master WHERE type = 'trigger' AND (name LIKE '%s_%%' OR name LIKE 'trigger_insert_feature_count_%s' OR name LIKE 'trigger_delete_feature_count_%s')"
% (
_quote_literal(rtree_prefix),
_quote_literal(lyr.GetName()),
_quote_literal(lyr.GetName()),
)
)
triggers = []
sql_lyr = dst_ds.ExecuteSQL(sql)
for f in sql_lyr:
trigger_name = f["name"]
dst_ds.ExecuteSQL('DROP TRIGGER "%s"' % _quote_id(trigger_name))
triggers.append(f["sql"])
dst_ds.ReleaseResultSet(sql_lyr)
def get_normalized_field_names(ds, lyr):
fields = []
sql_lyr = ds.ExecuteSQL(
"PRAGMA table_info('%s')" % _quote_literal(lyr.GetName())
)
for f in sql_lyr:
col_name = f["name"]
if col_name == lyr.GetFIDColumn():
fields.append("__FID__")
elif col_name == lyr.GetGeometryColumn():
fields.append("__GEOMETRY_COLUMN__")
else:
fields.append(col_name)
ds.ReleaseResultSet(sql_lyr)
return fields
# Build list of field names to select
if (
src_srs_id == dst_srs_id
and s_srs is None
and t_srs is None
and get_normalized_field_names(src_ds, src_lyr)
== get_normalized_field_names(dst_ds, lyr)
):
# If fields in source and target layers are ordered the same, using * is slightly faster
# than selecting individual fields
fields = "*"
else:
fields = '"%s"' % _quote_id(src_lyr.GetFIDColumn())
if has_geom:
if src_srs_id == dst_srs_id:
fields += ', "%s"' % _quote_id(src_lyr.GetGeometryColumn())
elif t_srs:
# Reproject
if s_srs:
s_srs_obj = osr.SpatialReference()
s_srs_obj.SetFromUserInput(s_srs)
assert s_srs_obj.GetAuthorityName(None) == "EPSG"
src_srs_id = int(s_srs_obj.GetAuthorityCode(None))
fields += ', ST_Transform(SetSRID("%s", %d), %d)' % (
_quote_id(src_lyr.GetGeometryColumn()),
src_srs_id,
dst_srs_id,
)
else:
fields += ', ST_Transform("%s", %d)' % (
_quote_id(src_lyr.GetGeometryColumn()),
dst_srs_id,
)
else:
# Just remap the geometry SRID to the one of the destination dataset
fields += ', SetSRID("%s", %d)' % (
_quote_id(src_lyr.GetGeometryColumn()),
dst_srs_id,
)
for field_idx in range(src_lyr.GetLayerDefn().GetFieldCount()):
fields += ', "%s"' % _quote_id(
src_lyr.GetLayerDefn().GetFieldDefn(field_idx).GetName()
)
dst_ds.ExecuteSQL(
"ATTACH DATABASE '%s' AS source_db" % _quote_literal(src_dsname)
)
threaded_progress = None
if progress_callback:
threaded_progress = ThreadedProgress(dst_filename, estimated_final_size)
# Copy features
try:
dst_ds.ExecuteSQL(
'INSERT INTO "%s" SELECT %s FROM source_db."%s"'
% (_quote_id(lyr.GetName()), fields, _quote_id(src_lyr.GetName()))
)
finally:
if threaded_progress:
threaded_progress.stop()
# Update gpkg_ogr_contents
sql_lyr = dst_ds.ExecuteSQL("SELECT changes()")
f = sql_lyr.GetNextFeature()
num_rows_inserted = f.GetField(0)
f = None
dst_ds.ReleaseResultSet(sql_lyr)
src_feature_count = src_lyr.GetFeatureCount(force=0)
if src_feature_count >= 0 and num_rows_inserted != src_feature_count:
print(
"Warning: %d rows inserted into %s whereas %d expected"
% (num_rows_inserted, lyr.GetName(), src_feature_count),
file=sys.stderr,
)
dst_ds.ExecuteSQL(
"INSERT OR REPLACE INTO gpkg_ogr_contents VALUES('%s',%d)"
% (_quote_literal(lyr.GetName()), num_rows_inserted)
)
recreateSpatialIndex = False
if has_geom and create_spatial_index:
if can_reuse_spatial_index and _gpkg_has_spatial_index(src_ds, src_lyr):
# print("Copying spatial index")
src_rtree_prefix = "rtree_%s_%s" % (
src_lyr.GetName(),
src_lyr.GetGeometryColumn(),
)
if progress_callback:
threaded_progress = ThreadedProgress(
dst_filename, estimated_final_size
)
try:
dst_ds.ExecuteSQL(
'DELETE FROM "%s_node" ' % _quote_id(rtree_prefix)
)
dst_ds.ExecuteSQL(
'INSERT INTO "%s_node" SELECT * FROM source_db."%s_node"'
% (_quote_id(rtree_prefix), _quote_id(src_rtree_prefix))
)
dst_ds.ExecuteSQL(
'INSERT INTO "%s_rowid" SELECT * FROM source_db."%s_rowid"'
% (_quote_id(rtree_prefix), _quote_id(src_rtree_prefix))
)
dst_ds.ExecuteSQL(
'INSERT INTO "%s_parent" SELECT * FROM source_db."%s_parent"'
% (_quote_id(rtree_prefix), _quote_id(src_rtree_prefix))
)
finally:
if threaded_progress:
threaded_progress.stop()
else:
recreateSpatialIndex = True
dst_ds.ExecuteSQL("DETACH DATABASE source_db")
# Manually register gpkg_geom_* extensions, if not already done
# at layer creation time.
sql_lyr = src_ds.ExecuteSQL(
"SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'gpkg_extensions'"
)
has_gpkg_extensions = sql_lyr.GetFeatureCount() == 1
src_ds.ReleaseResultSet(sql_lyr)
if has_geom and has_gpkg_extensions:
sql = (
"SELECT extension_name FROM gpkg_extensions WHERE table_name = '%s' AND column_name = '%s' AND extension_name LIKE 'gpkg_geom_%%'"
% (
_quote_literal(src_lyr.GetName()),
_quote_literal(src_lyr.GetGeometryColumn()),
)
)
sql_lyr = src_ds.ExecuteSQL(sql)
for f in sql_lyr:
geom_type = f.GetField(0)[len("gpkg_geom_") :]
dst_ds.ReleaseResultSet(
dst_ds.ExecuteSQL(
"SELECT RegisterGeometryExtension('%s', '%s', '%s')"
% (lyr.GetName(), lyr.GetGeometryColumn(), geom_type)
)
)
src_ds.ReleaseResultSet(sql_lyr)
# Update extent
if has_geom:
res = src_lyr.GetExtent(force=1, can_return_null=True)
if res:
minx, maxx, miny, maxy = res
sql = (
"UPDATE gpkg_contents SET min_x=%.18g, min_y=%.18g, max_x=%.18g, max_y=%.18g WHERE table_name = '%s'"
% (minx, miny, maxx, maxy, _quote_literal(lyr.GetName()))
)
dst_ds.ExecuteSQL(sql)
# Re-install triggers
for sql in triggers:
dst_ds.ExecuteSQL(sql)
if recreateSpatialIndex:
# print("Recreating spatial index")
dst_ds.ReleaseResultSet(
dst_ds.ExecuteSQL(
"SELECT DisableSpatialIndex('%s', '%s')"
% (
_quote_literal(lyr.GetName()),
_quote_literal(lyr.GetGeometryColumn()),
)
)
)
dst_ds.ReleaseResultSet(
dst_ds.ExecuteSQL(
"SELECT CreateSpatialIndex('%s', '%s')"
% (
_quote_literal(lyr.GetName()),
_quote_literal(lyr.GetGeometryColumn()),
)
)
)
if progress_callback:
progress_callback(1.0, "", progress_arg)
return 0
def ogrmerge(
src_datasets: Optional[Sequence[str]] = None,
dst_filename: Optional[PathLikeOrStr] = None,
driver_name: Optional[str] = None,
overwrite_ds: bool = False,
overwrite_layer: bool = False,
update: bool = False,
append: bool = False,
single_layer: bool = False,
layer_name_template: Optional[str] = None,
skip_failures: bool = False,
src_geom_types: Optional[Sequence[int]] = None,
field_strategy: Optional[str] = None,
src_layer_field_name: Optional[str] = None,
src_layer_field_content: Optional[str] = None,
a_srs: Optional[str] = None,
s_srs: Optional[str] = None,
t_srs: Optional[str] = None,
dsco: Optional[Sequence[str]] = None,
lco: Optional[Sequence[str]] = None,
progress_callback: Optional = None,
progress_arg: Optional = None,
):
src_datasets = src_datasets or []
src_geom_types = src_geom_types or []
dsco = dsco or []
lco = lco or []
if update:
if driver_name is not None:
print("ERROR: -f incompatible with -update", file=sys.stderr)
return 1
if dsco:
print("ERROR: -dsco incompatible with -update", file=sys.stderr)
return 1
driver_name = ""
else:
if driver_name is None:
driver_name = GetOutputDriverFor(dst_filename, is_raster=False)
if src_layer_field_content is None:
src_layer_field_content = "{AUTO_NAME}"
elif src_layer_field_name is None:
src_layer_field_name = "source_ds_lyr"
if (
not single_layer
and driver_name == "ESRI Shapefile"
and dst_filename.lower().endswith(".shp")
):
print(
"ERROR: Non-single layer mode incompatible with non-directory "
"shapefile output",
file=sys.stderr,
)
return 1
if not src_datasets:
print("ERROR: No source datasets", file=sys.stderr)
return 1
if layer_name_template is None:
if single_layer:
layer_name_template = "merged"
else:
layer_name_template = "{AUTO_NAME}"
def get_vector_file_in_update_no_exception(filename):
with gdal.ExceptionMgr(useExceptions=False), gdal.quiet_errors():
return gdal.OpenEx(filename, gdal.OF_VECTOR | gdal.OF_UPDATE)
if (
not single_layer
and EQUAL(driver_name, "GPKG")
and get_vector_file_in_update_no_exception(dst_filename) is None
and EQUAL(gdal.GetConfigOption("OGR_MERGE_ENABLE_GPKG_OPTIM", "YES"), "YES")
):
def are_sources_gpkg():
for src_dsname in src_datasets:
if src_dsname.lower().endswith(".gpkg"):
src_ds = ogr.Open(src_dsname)
if src_ds is None:
return False
for src_lyr in src_ds:
if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
# shouldn't happen for now...
print(
"Code is not ready for multi-geometry column GPKG",
file=sys.stderr,
)
return False
else:
return False
return True
compat_of_gpkg_optim = are_sources_gpkg()
if compat_of_gpkg_optim:
if s_srs:
s_srs_obj = osr.SpatialReference()
s_srs_obj.SetFromUserInput(s_srs)
if s_srs_obj.GetAuthorityName(None) != "EPSG":
compat_of_gpkg_optim = False
if compat_of_gpkg_optim:
return _gpkg_ogrmerge(
src_datasets,
dst_filename,
driver_name,
layer_name_template,
skip_failures,
src_geom_types,
a_srs,
s_srs,
t_srs,
dsco,
lco,
progress_callback,
progress_arg,
)
vrt_filename = None
if not EQUAL(driver_name, "VRT"):
dst_ds = get_vector_file_in_update_no_exception(dst_filename)
if dst_ds is not None:
if not update and not overwrite_ds:
print(
"ERROR: Destination dataset already exists, "
+ "but -update nor -overwrite_ds are specified",
file=sys.stderr,
)
return 1
if overwrite_ds:
drv = dst_ds.GetDriver()
dst_ds = None
if drv.GetDescription() == "OGR_VRT":
# We don't want to destroy the sources of the VRT
gdal.Unlink(dst_filename)
else:
drv.Delete(dst_filename)
elif update:
print("ERROR: Destination dataset does not exist", file=sys.stderr)
return 1
if dst_ds is None:
drv = gdal.GetDriverByName(driver_name)
if drv is None:
print("ERROR: Invalid driver: %s" % driver_name, file=sys.stderr)
return 1
dst_ds = drv.Create(dst_filename, 0, 0, 0, gdal.GDT_Unknown, dsco)
if dst_ds is None:
return 1
vrt_filename = "/vsimem/_ogrmerge_.vrt"
else:
if gdal.VSIStatL(dst_filename) and not overwrite_ds:
print(
"ERROR: Destination dataset already exists, "
+ "but -overwrite_ds are specified",
file=sys.stderr,
)
return 1
vrt_filename = dst_filename
f = gdal.VSIFOpenL(vrt_filename, "wb")
if f is None:
print("ERROR: Cannot create %s" % vrt_filename, file=sys.stderr)
return 1
writer = XMLWriter(f)
writer.open_element("OGRVRTDataSource")
if single_layer:
ogr_vrt_union_layer_written = False
for src_ds_idx, src_dsname in enumerate(src_datasets):
src_ds = ogr.Open(src_dsname)
if src_ds is None:
print("ERROR: Cannot open %s" % src_dsname, file=sys.stderr)
if skip_failures:
continue
gdal.VSIFCloseL(f)
gdal.Unlink(vrt_filename)
return 1
for src_lyr_idx, src_lyr in enumerate(src_ds):
if src_geom_types:
gt = ogr.GT_Flatten(src_lyr.GetGeomType())
if gt not in src_geom_types:
continue
if not ogr_vrt_union_layer_written:
ogr_vrt_union_layer_written = True
writer.open_element(
"OGRVRTUnionLayer", attrs={"name": layer_name_template}
)
if src_layer_field_name is not None:
writer.write_element_value(
"SourceLayerFieldName", src_layer_field_name
)
if field_strategy is not None:
writer.write_element_value("FieldStrategy", field_strategy)
layer_name = src_layer_field_content
src_lyr_name = src_lyr.GetName()
try:
src_lyr_name = src_lyr_name.decode("utf-8")
except AttributeError:
pass
basename = None
if os.path.exists(src_dsname):
basename = os.path.basename(src_dsname)
if "." in basename:
basename = ".".join(basename.split(".")[0:-1])
if basename == src_lyr_name:
layer_name = layer_name.replace("{AUTO_NAME}", basename)
elif basename is None:
layer_name = layer_name.replace(
"{AUTO_NAME}", "Dataset%d_%s" % (src_ds_idx, src_lyr_name)
)
else:
layer_name = layer_name.replace(
"{AUTO_NAME}", basename + "_" + src_lyr_name
)
if basename is not None:
layer_name = layer_name.replace("{DS_BASENAME}", basename)
else:
layer_name = layer_name.replace("{DS_BASENAME}", src_dsname)
layer_name = layer_name.replace("{DS_NAME}", "%s" % src_dsname)
layer_name = layer_name.replace("{DS_INDEX}", "%d" % src_ds_idx)
layer_name = layer_name.replace("{LAYER_NAME}", src_lyr_name)
layer_name = layer_name.replace("{LAYER_INDEX}", "%d" % src_lyr_idx)
if t_srs is not None:
writer.open_element("OGRVRTWarpedLayer")
writer.open_element("OGRVRTLayer", attrs={"name": layer_name})
attrs = {}
if (
EQUAL(driver_name, "VRT")
and os.path.exists(src_dsname)
and not os.path.isabs(src_dsname)
and "/" not in vrt_filename
and "\\" not in vrt_filename
):
attrs["relativeToVRT"] = "1"
if single_layer:
attrs["shared"] = "1"
writer.write_element_value("SrcDataSource", src_dsname, attrs=attrs)
writer.write_element_value("SrcLayer", src_lyr.GetName())
if a_srs is not None:
writer.write_element_value("LayerSRS", a_srs)
writer.close_element("OGRVRTLayer")
if t_srs is not None:
if s_srs is not None:
writer.write_element_value("SrcSRS", s_srs)
writer.write_element_value("TargetSRS", t_srs)
writer.close_element("OGRVRTWarpedLayer")
if ogr_vrt_union_layer_written:
writer.close_element("OGRVRTUnionLayer")
else:
for src_ds_idx, src_dsname in enumerate(src_datasets):
src_ds = ogr.Open(src_dsname)
if src_ds is None:
print("ERROR: Cannot open %s" % src_dsname, file=sys.stderr)
if skip_failures:
continue
gdal.VSIFCloseL(f)
gdal.Unlink(vrt_filename)
return 1
for src_lyr_idx, src_lyr in enumerate(src_ds):
if src_geom_types:
gt = ogr.GT_Flatten(src_lyr.GetGeomType())
if gt not in src_geom_types:
continue
src_lyr_name = src_lyr.GetName()
try:
src_lyr_name = src_lyr_name.decode("utf-8")
except AttributeError:
pass
layer_name = _build_layer_name_non_single_mode(
layer_name_template,
src_ds_idx,
src_dsname,
src_lyr_idx,
src_lyr.GetName(),
skip_failures,
)
if layer_name is None:
gdal.VSIFCloseL(f)
gdal.Unlink(vrt_filename)
return 1
if t_srs is not None:
writer.open_element("OGRVRTWarpedLayer")
writer.open_element("OGRVRTLayer", attrs={"name": layer_name})
attrs = {}
if (
EQUAL(driver_name, "VRT")
and os.path.exists(src_dsname)
and not os.path.isabs(src_dsname)
and "/" not in vrt_filename
and "\\" not in vrt_filename
):
attrs["relativeToVRT"] = "1"
if single_layer:
attrs["shared"] = "1"
writer.write_element_value("SrcDataSource", src_dsname, attrs=attrs)
writer.write_element_value("SrcLayer", src_lyr_name)
if a_srs is not None:
writer.write_element_value("LayerSRS", a_srs)
writer.close_element("OGRVRTLayer")
if t_srs is not None:
if s_srs is not None:
writer.write_element_value("SrcSRS", s_srs)
writer.write_element_value("TargetSRS", t_srs)
writer.close_element("OGRVRTWarpedLayer")
writer.close_element("OGRVRTDataSource")
gdal.VSIFCloseL(f)
ret = 0
if not EQUAL(driver_name, "VRT"):
accessMode = None
if append:
accessMode = "append"
elif overwrite_layer:
accessMode = "overwrite"
ret = gdal.VectorTranslate(
dst_ds,
vrt_filename,
accessMode=accessMode,
layerCreationOptions=lco,
skipFailures=skip_failures,
callback=progress_callback,
callback_data=progress_arg,
)
if ret == 1:
ret = 0
else:
ret = 1
gdal.Unlink(vrt_filename)
return ret
def main(argv=sys.argv):
argv = ogr.GeneralCmdLineProcessor(argv)
if argv is None:
return 0
return process(argv[1:])
if __name__ == "__main__":
sys.exit(main(sys.argv))