http://wiki.apache.org/cassandra/FAQ#large_file_and_blob_storage
We have to slit big files into multiple chunks.
Store file example in python (lazyboy).
# -*- coding: utf-8 -*-
# <Keyspaces>
# <Keyspace Name=\"BigStorage\">
# <ColumnFamily CompareWith=\"BytesType\" Name=\"Files\"/>
# <ColumnFamily CompareWith=\"BytesType\" Name=\"Chunks\"/>
# <ColumnFamily CompareWith=\"TimeUUIDType\" Name=\"FilesChunks\"/>
# </Keyspace>
# </Keyspaces>
#
import sys
import uuid
from lazyboy import *
from lazyboy.key import Key
# Define your cluster(s)
connection.add_pool(\'BigStorage\', [\'10.10.2.29:9160\'])
CHUNK_SIZE = 1024*512
class FileKey(Key):
def __init__(self, key=None):
Key.__init__(self, \"BigStorage\", \"Files\", key)
class File(record.Record):
_required = (\'size\',)
def __init__(self, *args, **kwargs):
record.Record.__init__(self, *args, **kwargs)
self.key = FileKey()
class ChunkKey(Key):
def __init__(self, key=None):
Key.__init__(self, \"BigStorage\", \"Chunks\", key)
class Chunk(record.Record):
_required = (\'data\',)
def __init__(self, *args, **kwargs):
record.Record.__init__(self, *args, **kwargs)
self.key = ChunkKey()
class FileChunkKey(Key):
def __init__(self, key=None):
Key.__init__(self, \"BigStorage\", \"FilesChunks\", key)
class FileChunk(record.Record):
# Anything in here _must_ be set before the object is saved
#_required = (\'data\',)
def __init__(self, *args, **kwargs):
\"\"\"Initialize the record, along with a new key.\"\"\"
record.Record.__init__(self, *args, **kwargs)
self.key = FileChunkKey()
def store_file(file_name, file_object):
chunk_keys = []
file_size = 0
new_file = File()
new_file.key = FileKey(file_name)
new_file.update({\'size\':0,\'stored\':0})
new_file.save()
while True:
data = file_object.read(CHUNK_SIZE)
if not data:
break
file_size += len(data)
chunk = Chunk({\'data\': data } )
key = str(uuid.uuid1())
chunk.key = ChunkKey( key )
chunk_keys.append(key)
chunk.save()
print key
for chunk_key in chunk_keys:
file_chunk = FileChunk()
file_chunk.update( {uuid.uuid1().bytes: chunk_key} )
file_chunk.key = FileChunkKey(file_name)
file_chunk.save()
new_file.update({\'size\':file_size,\'stored\':1})
new_file.save()
package eu.iddqd.casstorage;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.softao.jassandra.ByteArray;
import org.softao.jassandra.ConsistencyLevel;
import org.softao.jassandra.DriverManager;
import org.softao.jassandra.IColumn;
import org.softao.jassandra.IColumnFamily;
import org.softao.jassandra.IConnection;
import org.softao.jassandra.ICriteria;
import org.softao.jassandra.IKeySpace;
import org.softao.jassandra.JassandraException;
public class CasStorage {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
Properties info = new Properties();
info.put(DriverManager.CONSISTENCY_LEVEL,
ConsistencyLevel.ONE.toString());
try {
IConnection connection = DriverManager.getConnection(
\\"thrift://127.0.0.1:9160\\", info);
IKeySpace keySpace = connection.getKeySpace(\\"BigStorage\\");
IColumnFamily cfFilesChunks = keySpace.getColumnFamily(\\"FilesChunks\\");
IColumnFamily cfChunks = keySpace.getColumnFamily(\\"Chunks\\");
ICriteria criteria = cfFilesChunks.createCriteria();
ICriteria chunksCriteria = cfChunks.createCriteria();
String fileName = args[1];
criteria.keyList(fileName).columnRange(ByteArray.EMPTY, ByteArray.EMPTY, Integer.MAX_VALUE);
Map<String, List<IColumn>> map = criteria.select();
List<IColumn> list = map.get(fileName);
FileOutputStream out = new FileOutputStream(args[2]);
for (int i=0; i<list.size(); i++){
String chunkKey = new String(list.get(i).getValue().toByteArray());
chunksCriteria.keyList(chunkKey).
columnRange(ByteArray.EMPTY, ByteArray.EMPTY, Integer.MAX_VALUE);
Map<String, List<IColumn>> chunkMap = chunksCriteria.select();
out.write(chunkMap.get(chunkKey).get(0).getValue().toByteArray());
}
out.close();
} catch (JassandraException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}