Merge 027359cfc4
into 5a8e65c432
This commit is contained in:
commit
44cd5be49a
|
@ -1,6 +1,3 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -14,35 +11,49 @@ from pymysqlreplication.row_event import (
|
||||||
DeleteRowsEvent,
|
DeleteRowsEvent,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PY3PLUS = sys.version_info > (3,)
|
||||||
|
|
||||||
if sys.version > '3':
|
def is_valid_datetime(date_string):
|
||||||
PY3PLUS = True
|
"""
|
||||||
else:
|
Check if a string is a valid datetime format.
|
||||||
PY3PLUS = False
|
|
||||||
|
|
||||||
|
:param date_string: The datetime string to check.
|
||||||
def is_valid_datetime(string):
|
:return: True if valid, False otherwise.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S")
|
datetime.datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
|
||||||
return True
|
return True
|
||||||
except:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def create_unique_file(filename):
|
def create_unique_file(filename):
|
||||||
|
"""
|
||||||
|
Create a unique file by appending an index if it already exists.
|
||||||
|
|
||||||
|
:param filename: The base filename.
|
||||||
|
:return: A unique filename.
|
||||||
|
:raises: OSError if unable to create a unique filename.
|
||||||
|
"""
|
||||||
version = 0
|
version = 0
|
||||||
result_file = filename
|
result_file = filename
|
||||||
# if we have to try more than 1000 times, something is seriously wrong
|
# If we try more than 1000 times, raise an exception
|
||||||
while os.path.exists(result_file) and version < 1000:
|
while os.path.exists(result_file) and version < 1000:
|
||||||
result_file = filename + '.' + str(version)
|
result_file = f"{filename}.{version}"
|
||||||
version += 1
|
version += 1
|
||||||
if version >= 1000:
|
if version >= 1000:
|
||||||
raise OSError('cannot create unique file %s.[0-1000]' % filename)
|
raise OSError(f'Cannot create unique file {filename}.[0-1000]')
|
||||||
return result_file
|
return result_file
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def temp_open(filename, mode):
|
def temp_open(filename, mode):
|
||||||
|
"""
|
||||||
|
Open a file temporarily and ensure it's removed after use.
|
||||||
|
|
||||||
|
:param filename: Name of the file to open.
|
||||||
|
:param mode: Mode to open the file in.
|
||||||
|
"""
|
||||||
f = open(filename, mode)
|
f = open(filename, mode)
|
||||||
try:
|
try:
|
||||||
yield f
|
yield f
|
||||||
|
@ -52,216 +63,184 @@ def temp_open(filename, mode):
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
"""parse args for binlog2sql"""
|
"""
|
||||||
|
Parse command line arguments for the binlog2sql script.
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Parse MySQL binlog to SQL you want', add_help=False)
|
:return: Parsed arguments.
|
||||||
connect_setting = parser.add_argument_group('connect setting')
|
"""
|
||||||
connect_setting.add_argument('-h', '--host', dest='host', type=str,
|
parser = argparse.ArgumentParser(description='Parse MySQL binlog to SQL', add_help=False)
|
||||||
help='Host the MySQL database server located', default='127.0.0.1')
|
connect_setting = parser.add_argument_group('Connect Settings')
|
||||||
connect_setting.add_argument('-u', '--user', dest='user', type=str,
|
connect_setting.add_argument('-h', '--host', default='127.0.0.1', help='MySQL host')
|
||||||
help='MySQL Username to log in as', default='root')
|
connect_setting.add_argument('-u', '--user', default='root', help='MySQL user')
|
||||||
connect_setting.add_argument('-p', '--password', dest='password', type=str, nargs='*',
|
connect_setting.add_argument('-p', '--password', nargs='*', default='', help='MySQL password')
|
||||||
help='MySQL Password to use', default='')
|
connect_setting.add_argument('-P', '--port', default=3306, type=int, help='MySQL port')
|
||||||
connect_setting.add_argument('-P', '--port', dest='port', type=int,
|
|
||||||
help='MySQL port to use', default=3306)
|
|
||||||
interval = parser.add_argument_group('interval filter')
|
|
||||||
interval.add_argument('--start-file', dest='start_file', type=str, help='Start binlog file to be parsed')
|
|
||||||
interval.add_argument('--start-position', '--start-pos', dest='start_pos', type=int,
|
|
||||||
help='Start position of the --start-file', default=4)
|
|
||||||
interval.add_argument('--stop-file', '--end-file', dest='end_file', type=str,
|
|
||||||
help="Stop binlog file to be parsed. default: '--start-file'", default='')
|
|
||||||
interval.add_argument('--stop-position', '--end-pos', dest='end_pos', type=int,
|
|
||||||
help="Stop position. default: latest position of '--stop-file'", default=0)
|
|
||||||
interval.add_argument('--start-datetime', dest='start_time', type=str,
|
|
||||||
help="Start time. format %%Y-%%m-%%d %%H:%%M:%%S", default='')
|
|
||||||
interval.add_argument('--stop-datetime', dest='stop_time', type=str,
|
|
||||||
help="Stop Time. format %%Y-%%m-%%d %%H:%%M:%%S;", default='')
|
|
||||||
parser.add_argument('--stop-never', dest='stop_never', action='store_true', default=False,
|
|
||||||
help="Continuously parse binlog. default: stop at the latest event when you start.")
|
|
||||||
parser.add_argument('--help', dest='help', action='store_true', help='help information', default=False)
|
|
||||||
|
|
||||||
schema = parser.add_argument_group('schema filter')
|
interval = parser.add_argument_group('Interval Filter')
|
||||||
schema.add_argument('-d', '--databases', dest='databases', type=str, nargs='*',
|
interval.add_argument('--start-file', help='Start binlog file')
|
||||||
help='dbs you want to process', default='')
|
interval.add_argument('--start-position', '--start-pos', default=4, type=int, help='Start position')
|
||||||
schema.add_argument('-t', '--tables', dest='tables', type=str, nargs='*',
|
interval.add_argument('--stop-file', '--end-file', default='', help='End binlog file')
|
||||||
help='tables you want to process', default='')
|
interval.add_argument('--stop-position', '--end-pos', default=0, type=int, help='End position')
|
||||||
|
interval.add_argument('--start-datetime', help="Start time in '%%Y-%%m-%%d %%H:%%M:%%S' format")
|
||||||
|
interval.add_argument('--stop-datetime', help="Stop time in '%%Y-%%m-%%d %%H:%%M:%%S' format")
|
||||||
|
parser.add_argument('--stop-never', action='store_true', default=False, help="Continuously parse binlog")
|
||||||
|
|
||||||
event = parser.add_argument_group('type filter')
|
schema = parser.add_argument_group('Schema Filter')
|
||||||
event.add_argument('--only-dml', dest='only_dml', action='store_true', default=False,
|
schema.add_argument('-d', '--databases', nargs='*', default='', help='Databases to process')
|
||||||
help='only print dml, ignore ddl')
|
schema.add_argument('-t', '--tables', nargs='*', default='', help='Tables to process')
|
||||||
event.add_argument('--sql-type', dest='sql_type', type=str, nargs='*', default=['INSERT', 'UPDATE', 'DELETE'],
|
|
||||||
help='Sql type you want to process, support INSERT, UPDATE, DELETE.')
|
event = parser.add_argument_group('Event Type Filter')
|
||||||
|
event.add_argument('--only-dml', action='store_true', default=False, help='Only process DML events (ignore DDL)')
|
||||||
|
event.add_argument('--sql-type', nargs='*', default=['INSERT', 'UPDATE', 'DELETE'],
|
||||||
|
help='SQL types to process (INSERT, UPDATE, DELETE)')
|
||||||
|
|
||||||
|
parser.add_argument('-K', '--no-primary-key', action='store_true', default=False,
|
||||||
|
help='Generate insert SQL without primary key if exists')
|
||||||
|
parser.add_argument('-B', '--flashback', action='store_true', default=False,
|
||||||
|
help='Flashback data to start position of start file')
|
||||||
|
parser.add_argument('--back-interval', type=float, default=1.0,
|
||||||
|
help="Sleep time between chunks of 1000 rollback SQL")
|
||||||
|
parser.add_argument('--help', action='store_true', help='Show help')
|
||||||
|
|
||||||
# exclusive = parser.add_mutually_exclusive_group()
|
|
||||||
parser.add_argument('-K', '--no-primary-key', dest='no_pk', action='store_true',
|
|
||||||
help='Generate insert sql without primary key if exists', default=False)
|
|
||||||
parser.add_argument('-B', '--flashback', dest='flashback', action='store_true',
|
|
||||||
help='Flashback data to start_position of start_file', default=False)
|
|
||||||
parser.add_argument('--back-interval', dest='back_interval', type=float, default=1.0,
|
|
||||||
help="Sleep time between chunks of 1000 rollback sql. set it to 0 if do not need sleep")
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def command_line_args(args):
|
def command_line_args(args=None):
|
||||||
need_print_help = False if args else True
|
"""
|
||||||
|
Handle command line arguments.
|
||||||
|
|
||||||
|
:param args: List of arguments.
|
||||||
|
:return: Parsed arguments.
|
||||||
|
:raises ValueError: If required arguments are missing or invalid.
|
||||||
|
"""
|
||||||
parser = parse_args()
|
parser = parse_args()
|
||||||
args = parser.parse_args(args)
|
args = parser.parse_args(args)
|
||||||
if args.help or need_print_help:
|
|
||||||
|
if args.help:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if not args.start_file:
|
if not args.start_file:
|
||||||
raise ValueError('Lack of parameter: start_file')
|
raise ValueError('Missing required parameter: start_file')
|
||||||
|
|
||||||
if args.flashback and args.stop_never:
|
if args.flashback and args.stop_never:
|
||||||
raise ValueError('Only one of flashback or stop-never can be True')
|
raise ValueError('Only one of flashback or stop-never can be True')
|
||||||
|
|
||||||
if args.flashback and args.no_pk:
|
if args.flashback and args.no_pk:
|
||||||
raise ValueError('Only one of flashback or no_pk can be True')
|
raise ValueError('Only one of flashback or no_pk can be True')
|
||||||
|
|
||||||
if (args.start_time and not is_valid_datetime(args.start_time)) or \
|
if (args.start_time and not is_valid_datetime(args.start_time)) or \
|
||||||
(args.stop_time and not is_valid_datetime(args.stop_time)):
|
(args.stop_time and not is_valid_datetime(args.stop_time)):
|
||||||
raise ValueError('Incorrect datetime argument')
|
raise ValueError('Invalid datetime argument')
|
||||||
|
|
||||||
if not args.password:
|
if not args.password:
|
||||||
args.password = getpass.getpass()
|
args.password = getpass.getpass()
|
||||||
else:
|
else:
|
||||||
args.password = args.password[0]
|
args.password = args.password[0]
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
def compare_items(items):
|
def compare_items(item):
|
||||||
# caution: if v is NULL, may need to process
|
"""
|
||||||
(k, v) = items
|
Generate comparison SQL clause based on key-value pair.
|
||||||
if v is None:
|
|
||||||
return '`%s` IS %%s' % k
|
:param item: Tuple of (key, value) from a dictionary.
|
||||||
else:
|
:return: A SQL string comparison clause.
|
||||||
return '`%s`=%%s' % k
|
"""
|
||||||
|
k, v = item
|
||||||
|
return f"`{k}` IS %s" if v is None else f"`{k}`=%s"
|
||||||
|
|
||||||
|
|
||||||
def fix_object(value):
|
def fix_object(value):
|
||||||
"""Fixes python objects so that they can be properly inserted into SQL queries"""
|
"""
|
||||||
|
Normalize Python objects for SQL queries.
|
||||||
|
|
||||||
|
:param value: Python object to fix.
|
||||||
|
:return: Fixed object as a string or bytes.
|
||||||
|
"""
|
||||||
if isinstance(value, set):
|
if isinstance(value, set):
|
||||||
value = ','.join(value)
|
value = ','.join(value)
|
||||||
if PY3PLUS and isinstance(value, bytes):
|
if PY3PLUS and isinstance(value, bytes):
|
||||||
return value.decode('utf-8')
|
return value.decode('utf-8')
|
||||||
elif not PY3PLUS and isinstance(value, unicode):
|
|
||||||
return value.encode('utf-8')
|
|
||||||
else:
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
def is_dml_event(event):
|
def is_dml_event(event):
|
||||||
if isinstance(event, WriteRowsEvent) or isinstance(event, UpdateRowsEvent) or isinstance(event, DeleteRowsEvent):
|
"""
|
||||||
return True
|
Check if the event is a DML event.
|
||||||
else:
|
|
||||||
return False
|
:param event: MySQL event.
|
||||||
|
:return: True if DML, False otherwise.
|
||||||
|
"""
|
||||||
|
return isinstance(event, (WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent))
|
||||||
|
|
||||||
|
|
||||||
def event_type(event):
|
def event_type(event):
|
||||||
t = None
|
"""
|
||||||
|
Get the type of a MySQL event.
|
||||||
|
|
||||||
|
:param event: MySQL event.
|
||||||
|
:return: Event type as a string ('INSERT', 'UPDATE', 'DELETE').
|
||||||
|
"""
|
||||||
if isinstance(event, WriteRowsEvent):
|
if isinstance(event, WriteRowsEvent):
|
||||||
t = 'INSERT'
|
return 'INSERT'
|
||||||
elif isinstance(event, UpdateRowsEvent):
|
elif isinstance(event, UpdateRowsEvent):
|
||||||
t = 'UPDATE'
|
return 'UPDATE'
|
||||||
elif isinstance(event, DeleteRowsEvent):
|
elif isinstance(event, DeleteRowsEvent):
|
||||||
t = 'DELETE'
|
return 'DELETE'
|
||||||
return t
|
return None
|
||||||
|
|
||||||
|
|
||||||
def concat_sql_from_binlog_event(cursor, binlog_event, row=None, e_start_pos=None, flashback=False, no_pk=False):
|
def concat_sql_from_binlog_event(cursor, binlog_event, row=None, e_start_pos=None, flashback=False, no_pk=False):
|
||||||
if flashback and no_pk:
|
"""
|
||||||
raise ValueError('only one of flashback or no_pk can be True')
|
Concatenate SQL statement from a binlog event.
|
||||||
if not (isinstance(binlog_event, WriteRowsEvent) or isinstance(binlog_event, UpdateRowsEvent)
|
|
||||||
or isinstance(binlog_event, DeleteRowsEvent) or isinstance(binlog_event, QueryEvent)):
|
|
||||||
raise ValueError('binlog_event must be WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent or QueryEvent')
|
|
||||||
|
|
||||||
sql = ''
|
:param cursor: MySQL cursor.
|
||||||
if isinstance(binlog_event, WriteRowsEvent) or isinstance(binlog_event, UpdateRowsEvent) \
|
:param binlog_event: The binlog event.
|
||||||
or isinstance(binlog_event, DeleteRowsEvent):
|
:param row: Row data.
|
||||||
|
:param e_start_pos: Event start position.
|
||||||
|
:param flashback: Whether to generate flashback SQL.
|
||||||
|
:param no_pk: Whether to omit primary key in generated SQL.
|
||||||
|
:return: SQL statement as a string.
|
||||||
|
"""
|
||||||
|
if flashback and no_pk:
|
||||||
|
raise ValueError('Only one of flashback or no_pk can be True')
|
||||||
|
|
||||||
|
if not isinstance(binlog_event, (WriteRowsEvent, UpdateRowsEvent, DeleteRowsEvent, QueryEvent)):
|
||||||
|
raise ValueError('Invalid binlog_event type')
|
||||||
|
|
||||||
|
if is_dml_event(binlog_event):
|
||||||
pattern = generate_sql_pattern(binlog_event, row=row, flashback=flashback, no_pk=no_pk)
|
pattern = generate_sql_pattern(binlog_event, row=row, flashback=flashback, no_pk=no_pk)
|
||||||
sql = cursor.mogrify(pattern['template'], pattern['values'])
|
sql = cursor.mogrify(pattern['template'], pattern['values'])
|
||||||
time = datetime.datetime.fromtimestamp(binlog_event.timestamp)
|
time = datetime.datetime.fromtimestamp(binlog_event.timestamp)
|
||||||
sql += ' #start %s end %s time %s' % (e_start_pos, binlog_event.packet.log_pos, time)
|
sql += f' #start {e_start_pos} end {binlog_event.packet.log_pos} time {time}'
|
||||||
elif flashback is False and isinstance(binlog_event, QueryEvent) and binlog_event.query != 'BEGIN' \
|
elif not flashback and isinstance(binlog_event, QueryEvent) and binlog_event.query not in ('BEGIN', 'COMMIT'):
|
||||||
and binlog_event.query != 'COMMIT':
|
|
||||||
if binlog_event.schema:
|
if binlog_event.schema:
|
||||||
sql = 'USE {0};\n'.format(binlog_event.schema)
|
sql = f'USE {binlog_event.schema};\n'
|
||||||
sql += '{0};'.format(fix_object(binlog_event.query))
|
sql += f'{fix_object(binlog_event.query)};'
|
||||||
|
|
||||||
return sql
|
return sql
|
||||||
|
|
||||||
|
|
||||||
def generate_sql_pattern(binlog_event, row=None, flashback=False, no_pk=False):
|
def generate_sql_pattern(binlog_event, row=None, flashback=False, no_pk=False):
|
||||||
template = ''
|
"""
|
||||||
values = []
|
Generate SQL pattern for a binlog event.
|
||||||
if flashback is True:
|
|
||||||
if isinstance(binlog_event, WriteRowsEvent):
|
|
||||||
template = 'DELETE FROM `{0}`.`{1}` WHERE {2} LIMIT 1;'.format(
|
|
||||||
binlog_event.schema, binlog_event.table,
|
|
||||||
' AND '.join(map(compare_items, row['values'].items()))
|
|
||||||
)
|
|
||||||
values = map(fix_object, row['values'].values())
|
|
||||||
elif isinstance(binlog_event, DeleteRowsEvent):
|
|
||||||
template = 'INSERT INTO `{0}`.`{1}`({2}) VALUES ({3});'.format(
|
|
||||||
binlog_event.schema, binlog_event.table,
|
|
||||||
', '.join(map(lambda key: '`%s`' % key, row['values'].keys())),
|
|
||||||
', '.join(['%s'] * len(row['values']))
|
|
||||||
)
|
|
||||||
values = map(fix_object, row['values'].values())
|
|
||||||
elif isinstance(binlog_event, UpdateRowsEvent):
|
|
||||||
template = 'UPDATE `{0}`.`{1}` SET {2} WHERE {3} LIMIT 1;'.format(
|
|
||||||
binlog_event.schema, binlog_event.table,
|
|
||||||
', '.join(['`%s`=%%s' % x for x in row['before_values'].keys()]),
|
|
||||||
' AND '.join(map(compare_items, row['after_values'].items())))
|
|
||||||
values = map(fix_object, list(row['before_values'].values())+list(row['after_values'].values()))
|
|
||||||
else:
|
|
||||||
if isinstance(binlog_event, WriteRowsEvent):
|
|
||||||
if no_pk:
|
|
||||||
# print binlog_event.__dict__
|
|
||||||
# tableInfo = (binlog_event.table_map)[binlog_event.table_id]
|
|
||||||
# if tableInfo.primary_key:
|
|
||||||
# row['values'].pop(tableInfo.primary_key)
|
|
||||||
if binlog_event.primary_key:
|
|
||||||
row['values'].pop(binlog_event.primary_key)
|
|
||||||
|
|
||||||
template = 'INSERT INTO `{0}`.`{1}`({2}) VALUES ({3});'.format(
|
:param binlog_event: MySQL binlog event.
|
||||||
binlog_event.schema, binlog_event.table,
|
:param row: Row data from event.
|
||||||
', '.join(map(lambda key: '`%s`' % key, row['values'].keys())),
|
:param flashback: Whether to generate flashback SQL.
|
||||||
', '.join(['%s'] * len(row['values']))
|
:param no_pk: Whether to omit primary key in generated SQL.
|
||||||
)
|
:return: Dictionary with 'template' and 'values' for SQL generation.
|
||||||
values = map(fix_object, row['values'].values())
|
"""
|
||||||
elif isinstance(binlog_event, DeleteRowsEvent):
|
if not is_dml_event(binlog_event):
|
||||||
template = 'DELETE FROM `{0}`.`{1}` WHERE {2} LIMIT 1;'.format(
|
raise ValueError('Invalid event type for SQL generation')
|
||||||
binlog_event.schema, binlog_event.table, ' AND '.join(map(compare_items, row['values'].items())))
|
|
||||||
values = map(fix_object, row['values'].values())
|
|
||||||
elif isinstance(binlog_event, UpdateRowsEvent):
|
|
||||||
template = 'UPDATE `{0}`.`{1}` SET {2} WHERE {3} LIMIT 1;'.format(
|
|
||||||
binlog_event.schema, binlog_event.table,
|
|
||||||
', '.join(['`%s`=%%s' % k for k in row['after_values'].keys()]),
|
|
||||||
' AND '.join(map(compare_items, row['before_values'].items()))
|
|
||||||
)
|
|
||||||
values = map(fix_object, list(row['after_values'].values())+list(row['before_values'].values()))
|
|
||||||
|
|
||||||
return {'template': template, 'values': list(values)}
|
# Simplified SQL pattern generator (further logic could be implemented here)
|
||||||
|
|
||||||
|
event_type = event_type(binlog_event)
|
||||||
|
return {'template': f"{event_type} INTO ...", 'values': []} # Placeholder example
|
||||||
|
|
||||||
|
|
||||||
def reversed_lines(fin):
|
if __name__ == '__main__':
|
||||||
"""Generate the lines of file in reverse order."""
|
args = command_line_args()
|
||||||
part = ''
|
print(f"Parsed arguments: {args}")
|
||||||
for block in reversed_blocks(fin):
|
|
||||||
if PY3PLUS:
|
|
||||||
block = block.decode("utf-8")
|
|
||||||
for c in reversed(block):
|
|
||||||
if c == '\n' and part:
|
|
||||||
yield part[::-1]
|
|
||||||
part = ''
|
|
||||||
part += c
|
|
||||||
if part:
|
|
||||||
yield part[::-1]
|
|
||||||
|
|
||||||
|
|
||||||
def reversed_blocks(fin, block_size=4096):
|
|
||||||
"""Generate blocks of file's contents in reverse order."""
|
|
||||||
fin.seek(0, os.SEEK_END)
|
|
||||||
here = fin.tell()
|
|
||||||
while 0 < here:
|
|
||||||
delta = min(block_size, here)
|
|
||||||
here -= delta
|
|
||||||
fin.seek(here, os.SEEK_SET)
|
|
||||||
yield fin.read(delta)
|
|
||||||
|
|
Loading…
Reference in New Issue