Monday, February 1, 2010

mongodb backup with zfs

http://www.mongodb.org/display/DOCS/fsync+Command

use admin
switched to db admin
> db.runCommand({fsync:1,lock:1})
{
 \"info\" : \"now locked against writes\",
 \"ok\" : 1
}
> db.currentOp()
{
 \"inprog\" : [
 ],
 \"fsyncLock\" : 1
}

> // do some work here: for example, snapshot datafiles...
/sbin/zfs/snapshot /mongo/mongo1@backup

> db.$cmd.sys.unlock.findOne();
{ \"ok\" : 1, \"info\" : \"unlock requested\" }
> // unlock is now requested.  it may take a moment to take effect.
> db.currentOp()
{ \"inprog\" : [ ] }


[Database]
server: localhost
user: root
password: pass
port: 27017

[ZFS]
zfs_command: /usr/sbin/zfs

; ZFS filesystem containing the MongoDB database files.
db_fs_name: mongo
db_fs_dir: /mong


import ConfigParser
import sys,os,time,commands,re

from optparse import OptionParser
from os.path import exists

from pymongo.connection import Connection
from pymongo.son import SON


txt_program_revision = \"0.1\"
txt_program_name = \"MoBack\"

args_parser = OptionParser(usage=\"%prog --filename MoBack.cfg -d directory\", version=txt_program_name + \" \" + txt_program_revision)
args_parser.add_option(\"-f\", \"--filename\", dest=\"FN_CONFIG\",help=\"Configuration file containing MONGODB and ZFS settings to use for backup run.\")
args_parser.add_option(\"-d\", \"--directory\", dest=\"DIRECTORY\",help=\"Copy backup to directory\")
args_parser.add_option(\"-N\", \"--no-destroy\", dest=\"NODESTROY\",action=\"store_true\", help=\"Ceated snapshot will be not destroyed.\",default=False )

arg_program_options, arg_leftover = args_parser.parse_args()

if arg_program_options.FN_CONFIG==None:
    print \"\\nMust supply a configuration filename. Please use --help for syntax.\"
    sys.exit(-1)

if arg_program_options.DIRECTORY==None:
    print \"\\nMust supply a directory where snapshot will send. Please use --help for syntax.\"
    sys.exit(-1)

### Load in configuration settings
config_parser = ConfigParser.SafeConfigParser()
try:
    f_config = open(arg_program_options.FN_CONFIG)
except IOError, e:
    print \"\\nCannot find \" + arg_program_options.FN_CONFIG + \". Please check the argument and try again.\"
    sys.exit(-2)

backup_dir = None
if not exists( arg_program_options.DIRECTORY):
    print \"\\nDirectory \" + arg_program_options.FN_CONFIG + \" don\'t exist. Please check the argument and try again.\"
    sys.exit(-2)
else:
    backup_dir = arg_program_options.DIRECTORY


config_parser.readfp(f_config)

# Validate individual required options
try:
    database_server= config_parser.get(\"Database\",\"server\")
    database_user  = config_parser.get(\"Database\",\"user\")
    database_pass  = config_parser.get(\"Database\",\"password\")
    database_port  = config_parser.get(\"Database\",\"port\")

    fn_zfs_cmd = config_parser.get(\"ZFS\",\"zfs_command\")
    fn_zfs_db_fs = config_parser.get(\"ZFS\",\"db_fs_name\")
    fn_zfs_db_fs_dir  = config_parser.get(\"ZFS\",\"db_fs_dir\")
except ConfigParser.NoOptionError, e:
    print \"\\n\" + repr(e)
    print \"Please make sure all required sections and options are supplied and try again.\"
    f_config.close()
    sys.exit(-3)

zfs_list_status, zfs_list_output  = commands.getstatusoutput(fn_zfs_cmd + \" list\")
if not re.compile(fn_zfs_db_fs).search(zfs_list_output, 1):
    print \"\\nNo ZFS DB filesystem named \" + fn_zfs_db_fs + \" exists. Please check your configuration and try again.\"
    sys.exit(-4) 
print \"\\nAll specified ZFS filesystems are present.\"


print \"\\n\\nMoBack is connecting to MongoDB:\"

try:
    connection = Connection( database_server , int(database_port))
    db = connection[\'admin\']
    db.authenticate(database_user, database_pass)
except:
    print \"Unable to connect to MySQL server \" + database_server + \". Please check the configuration file and try again.\"
    sys.exit(-5)

print \"\\tConnected to \" + database_server + \".\"

time_snapshot = \'auto-\'+ time.strftime(\"%Y-%m-%d_%H%M%S\")

print \"\\tPrepared MongoDB to ensure  consistency.\"
loc_cmd = SON()
loc_cmd[\'fsync\'] = 1
loc_cmd[\'lock\'] = 1
res = db.command(loc_cmd)

### Pull ZFS snapshots.
try:
    # Snapshot DB
    snap_name = fn_zfs_db_fs + \"@\" + time_snapshot
    print \"\\tSnapping ZFS DB filesystem.\"
    zfs_dbsnap_status, zfs_dbsnap_output = commands.getstatusoutput(fn_zfs_cmd + \" snapshot \" + snap_name)
    if zfs_dbsnap_status != 0:
        print \"An error occurred while executing the ZFS snapshot on \" + _fn_zfs_db_fs + \". Unlocking collections and quitting.\"
        coll = db[\'$cmd.sys.unlock\']
        coll.findOne()
        sys.exit(-7)
except:
    coll = db[\'$cmd.sys.unlock\']
    coll.findOne()
    print \"\\tAn unrecoverable error occurred while trying to pull the snapshots. We have unlocked the collections. Please check your system logs for details as to the ZFS error.\"
    sys.exit(-7)

print \"\\tUnlocking collections.\"
coll = db[\'$cmd.sys.unlock\']
coll.findOne()
print \"\\tcollections unlocked.\"
print \"MoBack snapshot run completed.\"

print \"\\n\\nSnapshots created:\"
print \"\\t\" + fn_zfs_db_fs + \"@\" + time_snapshot
sys.exit( 0)


if backup_dir:
    zfs_backsend_status, zfs_backsend_output = \\
        commands.getstatusoutput(fn_zfs_cmd + \" send \" + snap_name + \" > \"  + backup_dir +\"/\"+  _database_server+\"_\"+database_port+\"_\"+time_snapshot)

if zfs_backsend_status != 0:
    print \"\\nAn error occurred while executing the ZFS send on \" + fn_zfs_db_fs + \".\"
    print zfs_backsend_output
    sys.exit(-7)


if arg_program_options.NODESTROY != True:
    snap_re = re.compile(r\'@auto\\-\')
    if not snap_re.search(snap_name):
        print \"\\n\\nBad snapshot name.\"
        sys.exit(-1)

    print \"\\n\\nScript is going to destroiy snapshot \"+ snap_name +\".\"
    zfs_dbsdestroy_status, zfs_dbsdestroy_output = commands.getstatusoutput(fn_zfs_cmd + \" destroy \" + snap_name)
    if zfs_dbsdestroy_status != 0:
        print \"\\nAn error occurred while executing the ZFS destroy on \" +  snap_name + \".\"
        print zfs_dbsdestroy_output
        sys.exit(-7)
    print \"\\n\"+ snap_name +\" destroyed.\" 

ZFS has many problems with fragmentation.

2 comments:

  1. Do you think it would be good to host MongoDB on a compressed ZFS filesystem? There was an article today on the Sun blog on how compressed ZFS also increases performance.

    Why do you say ZFS has many problems with defragmentation?

    ReplyDelete
  2. Becouse company where I'm working now has many problem with it. We have over two petabytes of data on ZFS.
    With ZFS, data is always written to a new block on disk before changing the pointers to the data and committing the write.
    After many updates,deletes space block map is to big for memory and write speed is #@!$!#$#@.


    ZFS is'nt the best choice for mongodb, becouse mongo prealocate data files.Compression increse performance, mongodb works 10%-20% faster on compressed zfs filesystem. I recommended primary databas on UFS and slave on zfs (for backups).

    ReplyDelete