Last active
February 25, 2020 12:01
-
-
Save mttjohnson/ae8bb57ca12cbcf971df79e647c4936c to your computer and use it in GitHub Desktop.
MySQL DB snapshot separating tables into files for comparison purposes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
## | |
# Copyright © 2017 by Matt Johnson. All rights reserved | |
# | |
# Licensed under the Open Software License 3.0 (OSL-3.0) | |
# See included LICENSE file for full text of OSL-3.0 | |
# | |
# https://github.com/mttjohnson | |
## | |
# stop on errors | |
set -e | |
trap '>&2 echo Error: Command \`$BASH_COMMAND\` on line $LINENO failed with exit code $?' ERR | |
# identify current script directory | |
DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) | |
# default variable values | |
DESTINATION_PATH="${DIR}/dbsnapshots" | |
## command line argument parsing | |
for arg in "$@"; do | |
case $arg in | |
--db-name=*) | |
DB_NAME="${arg#*=}" | |
if [[ ! "$DB_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then | |
>&2 echo "Error: Invalid value given --db-name=$DB_NAME" | |
exit -1 | |
fi | |
;; | |
--destination-path=*) | |
DESTINATION_PATH="${arg#*=}" | |
if [[ ! "$DESTINATION_PATH" =~ ^(2\.0|develop)$ ]]; then | |
>&2 echo "Error: Invalid value given --destination-path=$DESTINATION_PATH" | |
exit -1 | |
fi | |
;; | |
--comment=*) | |
SNAPSHOT_COMMENT="${arg#*=}" | |
if [[ ! "$SNAPSHOT_COMMENT" =~ ^[a-z0-9][a-z0-9_/-]*[a-z0-9_-]+$ ]]; then | |
>&2 echo "Error: Invalid value given --comment=$SNAPSHOT_COMMENT" | |
exit -1 | |
fi | |
;; | |
--help) | |
echo "" | |
echo "Usage: $(basename $0) --db-name=<db-name> [--destination-path=<path>] [--comment=<comment>]" | |
echo "" | |
echo " --db-name=<db-name> database name to dump and take a snapshot of" | |
echo " --destination-path=<path> destnation directory path for snapshots" | |
echo " --comment=<comment> comment reference describing the snapshot" | |
echo "" | |
echo "The script will create a dbsnapshots directory and store the contents into a" | |
echo "separate timestamped directory on each execution of the script. A full backup of" | |
echo "the database will exist in the directory in addition to a tables directory. The" | |
echo "tables directory has a list of files broken out by table separate data and" | |
echo "structure files. When comparing database snapshots you will want to compare the" | |
echo "tables directories from each snapshot." | |
echo "" | |
echo "If you include a comment when executing the command there will be a" | |
echo "snapshot_comment.txt file in the main directory for each snapshot and can serve" | |
echo "as a reference for the state of the snapshot like 'before xyz' and 'after xzy'" | |
echo "" | |
echo "This script is intended to be run from a linux system, and attemting to use it" | |
echo "on MacOS will likely produce erros due to the use of csplit." | |
echo "" | |
exit -1 | |
;; | |
*) | |
>&2 echo "Error: Unrecognized argument $arg" | |
>&2 echo "Tip: If using an argument starting with \"--\", make sure to add \"=\" before the value" | |
exit -1 | |
;; | |
esac | |
done | |
## verify required command line arguments | |
if [[ ! "$DB_NAME" ]]; then | |
>&2 echo "Error: Required input --db-name missing. Please use --help for proper usage" | |
exit -1 | |
fi | |
# Get the current timestamp for the snapshot name | |
TIMESTAMP=`date +%Y-%m-%d-%H-%M-%S` | |
# load local variables | |
SNAPSHOT_PATH="${DESTINATION_PATH}/${DB_NAME}-${TIMESTAMP}" | |
DB_TABLE_PATH="${SNAPSHOT_PATH}/tables" | |
SQL_FILE="${SNAPSHOT_PATH}/${DB_NAME}-${TIMESTAMP}.sql" | |
# create the directories | |
mkdir -p ${DB_TABLE_PATH} | |
# create snapshot comment file if defined | |
[ -n "${SNAPSHOT_COMMENT}" ] && echo "${SNAPSHOT_COMMENT}" > ${SNAPSHOT_PATH}/snapshot_comment.txt | |
# excludes comments and uses extended inserts to make comparing diffs easier | |
# does not require maintaining a separate structure.sql (redundant info), but maintains each table as seperate file | |
mysqldump \ | |
--single-transaction \ | |
--triggers \ | |
--routines \ | |
--events \ | |
--compact \ | |
--add-drop-table \ | |
--extended-insert=FALSE \ | |
--default-character-set=utf8 \ | |
${DB_NAME} \ | |
| pv > ${SQL_FILE} | |
# change to table path | |
cd $DB_TABLE_PATH | |
# split dump into tables | |
csplit -s -n 4 -f table_ ${SQL_FILE} "/^DROP TABLE IF EXISTS/" '{*}' | |
# remove the 1st table file if it's empty | |
[ ! -s table_0000 ] && rm table_0000 | |
for i in table_*; do | |
# split table into structure and the data, triggers, etc. (_01 files) | |
csplit -s -f ${i}_ $i "/^INSERT INTO/" {0} || : | |
mv $i ${i}.structure | |
[ -e ${i}_00 ] && mv ${i}_00 ${i}.structure | |
done; | |
for i in table_*_01; do | |
# split the data, triggers, etc. into data and the etc files | |
csplit -s -f ${i}_ $i "/^\/\*/" {0} || : | |
mv $i ${i}.data | |
if [ -e ${i}_00 ]; then | |
mv ${i}_00 ${i}.data | |
mv ${i}_01 ${i}.etc | |
fi | |
done | |
for i in table_*.data; do | |
# sort the data | |
awk '{print $5 " " $0}' $i | cut -c 2- | sort -n | awk '{$1=""; print substr($0,2)}' >> $i.sorted | |
rm $i | |
done | |
for i in *.structure; do | |
# rename files based on their table name | |
table=$(sed -n '1 s/.*`\(.*\)`.*/\1/p' $i) | |
mv $i $table.structure || : | |
prefix=${i%.*} | |
if [ -e ${prefix}_01.data.sorted ]; then | |
mv ${prefix}_01.data.sorted $table.data.sorted | |
fi | |
if [ -e ${prefix}_01.etc ]; then | |
mv ${prefix}_01.etc $table.etc | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment