Skip to content

Multivec

Data preparation

Creating a sample multivec file with row infos

import h5py
import numpy as np

filepath = '/tmp/blah.h5'
f = h5py.File(filepath, 'w')

d = f.create_dataset('chr1', (10000,5), compression='gzip')
d[:] = np.random.random((10000,5))
f.close()

from clodius.multivec import create_multivec_multires
from os.path import expanduser

f_in = h5py.File(filepath, "r")
output_file = expanduser('~/Downloads/blah.multires.h5')

f_out = create_multivec_multires(
    f_in,
    chromsizes=[('chr1', 10000)],
    agg=lambda x: np.nansum(x.T.reshape((x.shape[1], -1, 2)), axis=2).T,
    starting_resolution=1,
    tile_size=1024,
    output_file=output_file,
    row_infos=['A','C','G','T','N'],
)

f_out.close()