Source code for pyImagingMSpec.convert.imzml

__author__ = 'palmer'
import argparse
import numpy as np

[docs]def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type input_filename string - source file path (must be .imzml) :type output_filename string - output file path (must be .imzml) :type step tuple grid spacing of pixels (if [] the script will try and guess it) :type apodization boolean whether to try and remove FT wiglet artefacts :type w_size window side (m/z bins) for apodization :type min_intensity: float minimum intensity peaks to return during centroiding :type prevent_duplicate_pixels bool if True will only return the first spectrum for pixels with the same coodinates """ from pyimzml.ImzMLParser import ImzMLParser from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient imzml_in = ImzMLParser(input_filename) precisionDict = {'f':("32-bit float", np.float32), 'd': ("64-bit float", np.float64), 'i': ("32-bit integer", np.int32), 'l': ("64-bit integer", np.int64)} mz_dtype = precisionDict[imzml_in.mzPrecision][1] int_dtype = precisionDict[imzml_in.intensityPrecision][1] # Convert coords to index -> kinda hacky coords = np.asarray(imzml_in.coordinates).round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([np.median(np.diff(np.unique(coords[:, i]))) for i in range(coords.shape[1])]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3,)).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view(np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(imzml_in.coordinates), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml_out: done = 0 for key in range(np.shape(imzml_in.coordinates)[0]): print key if all((prevent_duplicate_pixels, key not in coord_idx)): # skip duplicate pixels continue mzs, intensities = imzml_in.getspectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization(mzs, intensities, {'w_size':w_size}) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] if len(pos)==2: pos.append(0) pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml_out.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
if __name__ == '__main__': parser = argparse.ArgumentParser(description="convert centroids from profile imzml to centroided imzML") parser.add_argument('input', type=str, help="input imzml file") parser.add_argument('output', type=str, help="output filename (centroided imzml)") args = parser.parse_args() centroid_imzml(args.input, args.output)