iddqd: 2010

Monday, December 13, 2010

Postgresql tricks

Rownum in Postgresql

http://stackoverflow.com/questions/3959692/rownum-in-postgresql/3959748#3959748

Reverse index
http://stackoverflow.com/questions/3927532/is-it-possible-to-index-select-domain-name-from-domains-where-x-example-com-li/3927783#3927783

Wednesday, August 18, 2010

How to remove duplicates from table (MySQL)

table_users(
id INT PRIMARY KEY,
username TEXT
);

delete from table_users
USING table_users, table_users as vtable
WHERE (table_users.id > vtable.id)
AND (table_users.username=vtable.username)

(table_users.id > vtable.id) - should return only one row

Thursday, July 15, 2010

Cassandra file storage

Cassandra doesn't support big blobs, there is no ready equivalent like gridfs (MongoDb). It's very important to keep in mind that Cassandra is written in Java.
http://wiki.apache.org/cassandra/FAQ#large_file_and_blob_storage

We have to slit big files into multiple chunks.

(file_name =>(size) )
(chunk_id =>( data) )
(file_name => (chunk_id, chunk_id ... )

Store file example in python (lazyboy).

# -*- coding: utf-8 -*-
# <Keyspaces>
#     <Keyspace Name=\"BigStorage\">
#        <ColumnFamily CompareWith=\"BytesType\" Name=\"Files\"/>
#        <ColumnFamily CompareWith=\"BytesType\" Name=\"Chunks\"/>
#        <ColumnFamily CompareWith=\"TimeUUIDType\" Name=\"FilesChunks\"/>
#     </Keyspace>
# </Keyspaces>
#

import sys
import uuid

from lazyboy import *
from lazyboy.key import Key

# Define your cluster(s)
connection.add_pool(\'BigStorage\', [\'10.10.2.29:9160\'])

CHUNK_SIZE = 1024*512


class FileKey(Key):
    def __init__(self, key=None):
        Key.__init__(self, \"BigStorage\", \"Files\", key)

class File(record.Record):
    _required = (\'size\',)

    def __init__(self, *args, **kwargs):
        record.Record.__init__(self, *args, **kwargs)
        self.key = FileKey()

class ChunkKey(Key):
    def __init__(self, key=None):
        Key.__init__(self, \"BigStorage\", \"Chunks\", key)

class Chunk(record.Record):
    _required = (\'data\',)

    def __init__(self, *args, **kwargs):
        record.Record.__init__(self, *args, **kwargs)
        self.key = ChunkKey()

class FileChunkKey(Key):
    def __init__(self, key=None):
        Key.__init__(self, \"BigStorage\", \"FilesChunks\", key)

class FileChunk(record.Record):
    # Anything in here _must_ be set before the object is saved
    #_required = (\'data\',)

    def __init__(self, *args, **kwargs):
        \"\"\"Initialize the record, along with a new key.\"\"\"
        record.Record.__init__(self, *args, **kwargs)
        self.key = FileChunkKey()


def store_file(file_name, file_object):
    chunk_keys = []
    file_size = 0

    new_file = File()
    new_file.key = FileKey(file_name)
    new_file.update({\'size\':0,\'stored\':0})
    new_file.save()
    
    while True:
        data = file_object.read(CHUNK_SIZE)
        if not data:
            break
            
        file_size += len(data)
        chunk = Chunk({\'data\': data } )
        key =  str(uuid.uuid1())
        chunk.key = ChunkKey( key )
        chunk_keys.append(key)
        chunk.save()
        print key
    
    for chunk_key in chunk_keys:
        file_chunk = FileChunk()
        file_chunk.update( {uuid.uuid1().bytes: chunk_key} )
        file_chunk.key = FileChunkKey(file_name)
        file_chunk.save()
    
    new_file.update({\'size\':file_size,\'stored\':1})
    new_file.save()

package eu.iddqd.casstorage;

import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.softao.jassandra.ByteArray;
import org.softao.jassandra.ConsistencyLevel;
import org.softao.jassandra.DriverManager;
import org.softao.jassandra.IColumn;
import org.softao.jassandra.IColumnFamily;
import org.softao.jassandra.IConnection;
import org.softao.jassandra.ICriteria;
import org.softao.jassandra.IKeySpace;
import org.softao.jassandra.JassandraException;

public class CasStorage {

 /**
  * @param args
  */
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  Properties info = new Properties();
  info.put(DriverManager.CONSISTENCY_LEVEL,
    ConsistencyLevel.ONE.toString());

  try {
   IConnection connection = DriverManager.getConnection(
     \\&quot;thrift://127.0.0.1:9160\\&quot;, info);
   IKeySpace keySpace = connection.getKeySpace(\\&quot;BigStorage\\&quot;);
   IColumnFamily cfFilesChunks = keySpace.getColumnFamily(\\&quot;FilesChunks\\&quot;);
   IColumnFamily cfChunks = keySpace.getColumnFamily(\\&quot;Chunks\\&quot;);
   

   ICriteria criteria = cfFilesChunks.createCriteria();
   ICriteria chunksCriteria = cfChunks.createCriteria();
   
   String fileName = args[1];  
   criteria.keyList(fileName).columnRange(ByteArray.EMPTY, ByteArray.EMPTY, Integer.MAX_VALUE);
   
   Map&lt;String, List&lt;IColumn&gt;&gt; map = criteria.select();
   List&lt;IColumn&gt; list = map.get(fileName);
   FileOutputStream out = new FileOutputStream(args[2]);      
   
   for (int i=0; i&lt;list.size(); i++){
    String chunkKey = new String(list.get(i).getValue().toByteArray());
    chunksCriteria.keyList(chunkKey).
     columnRange(ByteArray.EMPTY, ByteArray.EMPTY, Integer.MAX_VALUE);
    Map&lt;String, List&lt;IColumn&gt;&gt; chunkMap = chunksCriteria.select();
    out.write(chunkMap.get(chunkKey).get(0).getValue().toByteArray());    
   }
   
   out.close();
   
  } catch (JassandraException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException ioe) {
   ioe.printStackTrace();
  }
  

 }
}

Wednesday, April 14, 2010

Which sql query you think is the worst for disks ...

Based on milek.blogspot.com

#!/usr/sbin/dtrace -qs

#pragma D option strsize=8192


pid$3::*mysql_parse*:entry
{
self->a=1;
self->query=copyinstr(arg1);
self->count=0;

}

pid$3::*mysql_parse*:return
/ self->a && self->count > $2 /
{
printf(\"### write() count: %d ###\\n%s\\n\\n\", self->count, self->query);

self->a=0;
self->query=0;

}

pid$3::*mysql_parse*:return
/ self->a /
{
self->a=0;
self->query=0;
}

syscall::*write*:entry
/ self->a /
{
self->count++;
}

tick-$1
{
exit(0);
}

Wednesday, February 10, 2010

drop schema on oracle

DROP USER

ALTER USER p$prodschema ACCOUNT LOCK;
 
BEGIN
    FOR x
        IN (SELECT 'alter system disconnect session ''' || sid || ',' || serial# || ''' immediate;'
                       sqlstr
            FROM v$session
            WHERE username = 'P$PRODSCHEMA')
    LOOP
        EXECUTE IMMEDIATE x.sqlstr;
    END LOOP;
END;
 
DROP USER p$prodschema CASCADE;

Monday, February 1, 2010

mongodb backup with zfs

http://www.mongodb.org/display/DOCS/fsync+Command

use admin
switched to db admin
> db.runCommand({fsync:1,lock:1})
{
 \"info\" : \"now locked against writes\",
 \"ok\" : 1
}
> db.currentOp()
{
 \"inprog\" : [
 ],
 \"fsyncLock\" : 1
}

> // do some work here: for example, snapshot datafiles...
/sbin/zfs/snapshot /mongo/mongo1@backup

> db.$cmd.sys.unlock.findOne();
{ \"ok\" : 1, \"info\" : \"unlock requested\" }
> // unlock is now requested.  it may take a moment to take effect.
> db.currentOp()
{ \"inprog\" : [ ] }

[Database]
server: localhost
user: root
password: pass
port: 27017

[ZFS]
zfs_command: /usr/sbin/zfs

; ZFS filesystem containing the MongoDB database files.
db_fs_name: mongo
db_fs_dir: /mong

import ConfigParser
import sys,os,time,commands,re

from optparse import OptionParser
from os.path import exists

from pymongo.connection import Connection
from pymongo.son import SON


txt_program_revision = \"0.1\"
txt_program_name = \"MoBack\"

args_parser = OptionParser(usage=\"%prog --filename MoBack.cfg -d directory\", version=txt_program_name + \" \" + txt_program_revision)
args_parser.add_option(\"-f\", \"--filename\", dest=\"FN_CONFIG\",help=\"Configuration file containing MONGODB and ZFS settings to use for backup run.\")
args_parser.add_option(\"-d\", \"--directory\", dest=\"DIRECTORY\",help=\"Copy backup to directory\")
args_parser.add_option(\"-N\", \"--no-destroy\", dest=\"NODESTROY\",action=\"store_true\", help=\"Ceated snapshot will be not destroyed.\",default=False )

arg_program_options, arg_leftover = args_parser.parse_args()

if arg_program_options.FN_CONFIG==None:
    print \"\\nMust supply a configuration filename. Please use --help for syntax.\"
    sys.exit(-1)

if arg_program_options.DIRECTORY==None:
    print \"\\nMust supply a directory where snapshot will send. Please use --help for syntax.\"
    sys.exit(-1)

### Load in configuration settings
config_parser = ConfigParser.SafeConfigParser()
try:
    f_config = open(arg_program_options.FN_CONFIG)
except IOError, e:
    print \"\\nCannot find \" + arg_program_options.FN_CONFIG + \". Please check the argument and try again.\"
    sys.exit(-2)

backup_dir = None
if not exists( arg_program_options.DIRECTORY):
    print \"\\nDirectory \" + arg_program_options.FN_CONFIG + \" don\'t exist. Please check the argument and try again.\"
    sys.exit(-2)
else:
    backup_dir = arg_program_options.DIRECTORY


config_parser.readfp(f_config)

# Validate individual required options
try:
    database_server= config_parser.get(\"Database\",\"server\")
    database_user  = config_parser.get(\"Database\",\"user\")
    database_pass  = config_parser.get(\"Database\",\"password\")
    database_port  = config_parser.get(\"Database\",\"port\")

    fn_zfs_cmd = config_parser.get(\"ZFS\",\"zfs_command\")
    fn_zfs_db_fs = config_parser.get(\"ZFS\",\"db_fs_name\")
    fn_zfs_db_fs_dir  = config_parser.get(\"ZFS\",\"db_fs_dir\")
except ConfigParser.NoOptionError, e:
    print \"\\n\" + repr(e)
    print \"Please make sure all required sections and options are supplied and try again.\"
    f_config.close()
    sys.exit(-3)

zfs_list_status, zfs_list_output  = commands.getstatusoutput(fn_zfs_cmd + \" list\")
if not re.compile(fn_zfs_db_fs).search(zfs_list_output, 1):
    print \"\\nNo ZFS DB filesystem named \" + fn_zfs_db_fs + \" exists. Please check your configuration and try again.\"
    sys.exit(-4) 
print \"\\nAll specified ZFS filesystems are present.\"


print \"\\n\\nMoBack is connecting to MongoDB:\"

try:
    connection = Connection( database_server , int(database_port))
    db = connection[\'admin\']
    db.authenticate(database_user, database_pass)
except:
    print \"Unable to connect to MySQL server \" + database_server + \". Please check the configuration file and try again.\"
    sys.exit(-5)

print \"\\tConnected to \" + database_server + \".\"

time_snapshot = \'auto-\'+ time.strftime(\"%Y-%m-%d_%H%M%S\")

print \"\\tPrepared MongoDB to ensure  consistency.\"
loc_cmd = SON()
loc_cmd[\'fsync\'] = 1
loc_cmd[\'lock\'] = 1
res = db.command(loc_cmd)

### Pull ZFS snapshots.
try:
    # Snapshot DB
    snap_name = fn_zfs_db_fs + \"@\" + time_snapshot
    print \"\\tSnapping ZFS DB filesystem.\"
    zfs_dbsnap_status, zfs_dbsnap_output = commands.getstatusoutput(fn_zfs_cmd + \" snapshot \" + snap_name)
    if zfs_dbsnap_status != 0:
        print \"An error occurred while executing the ZFS snapshot on \" + _fn_zfs_db_fs + \". Unlocking collections and quitting.\"
        coll = db[\'$cmd.sys.unlock\']
        coll.findOne()
        sys.exit(-7)
except:
    coll = db[\'$cmd.sys.unlock\']
    coll.findOne()
    print \"\\tAn unrecoverable error occurred while trying to pull the snapshots. We have unlocked the collections. Please check your system logs for details as to the ZFS error.\"
    sys.exit(-7)

print \"\\tUnlocking collections.\"
coll = db[\'$cmd.sys.unlock\']
coll.findOne()
print \"\\tcollections unlocked.\"
print \"MoBack snapshot run completed.\"

print \"\\n\\nSnapshots created:\"
print \"\\t\" + fn_zfs_db_fs + \"@\" + time_snapshot
sys.exit( 0)


if backup_dir:
    zfs_backsend_status, zfs_backsend_output = \\
        commands.getstatusoutput(fn_zfs_cmd + \" send \" + snap_name + \" > \"  + backup_dir +\"/\"+  _database_server+\"_\"+database_port+\"_\"+time_snapshot)

if zfs_backsend_status != 0:
    print \"\\nAn error occurred while executing the ZFS send on \" + fn_zfs_db_fs + \".\"
    print zfs_backsend_output
    sys.exit(-7)


if arg_program_options.NODESTROY != True:
    snap_re = re.compile(r\'@auto\\-\')
    if not snap_re.search(snap_name):
        print \"\\n\\nBad snapshot name.\"
        sys.exit(-1)

    print \"\\n\\nScript is going to destroiy snapshot \"+ snap_name +\".\"
    zfs_dbsdestroy_status, zfs_dbsdestroy_output = commands.getstatusoutput(fn_zfs_cmd + \" destroy \" + snap_name)
    if zfs_dbsdestroy_status != 0:
        print \"\\nAn error occurred while executing the ZFS destroy on \" +  snap_name + \".\"
        print zfs_dbsdestroy_output
        sys.exit(-7)
    print \"\\n\"+ snap_name +\" destroyed.\"

ZFS has many problems with fragmentation.

Wednesday, January 20, 2010

MongoDB huge collections

I have to load 1 000 000 000 records to mongodb. It's not good idea to create indexes before this operation.
After first 40 000 000 speed decreesed from 10k/s to 1k/s.

iddqd