diff --git a/.gitignore b/.gitignore index e8503d2..20e0d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,4 @@ /site #results of example run -example/test_example* -example/test_results/* +example/_results/example* diff --git a/bin/nucleoatac b/bin/nucleoatac old mode 100644 new mode 100755 index b0e08df..9d52284 --- a/bin/nucleoatac +++ b/bin/nucleoatac @@ -18,20 +18,21 @@ import sys from nucleoatac import __version__ import time import matplotlib as mpl -mpl.use('PS') +#mpl.use('PS') from nucleoatac.cli import nucleoatac_parser, nucleoatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "nucleoatac version " + __version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + freeze_support() + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("nucleoatac version " + __version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = nucleoatac_parser() args = parser.parse_args() nucleoatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted nucleoatac.") sys.exit(0) diff --git a/bin/pyatac b/bin/pyatac index cc3dbb4..e9b5370 100644 --- a/bin/pyatac +++ b/bin/pyatac @@ -16,21 +16,22 @@ of the MIT License (see the file COPYING included with the distribution). import sys import matplotlib as mpl -mpl.use('PS') +#mpl.use('PS') from pyatac import __version__ import time from pyatac.cli import pyatac_parser, pyatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "pyatac version "+__version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + freeze_support() + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("pyatac version "+__version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = pyatac_parser() args = parser.parse_args() pyatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted pyatac.") sys.exit(0) diff --git a/example/test_results/README.txt b/example/_results/README.txt similarity index 100% rename from example/test_results/README.txt rename to example/_results/README.txt diff --git a/nucleoatac/NFRCalling.py b/nucleoatac/NFRCalling.py index d41fd4c..a0be397 100644 --- a/nucleoatac/NFRCalling.py +++ b/nucleoatac/NFRCalling.py @@ -89,7 +89,7 @@ def findNFRs(self): if self.chrom in tbx.contigs: for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()): nucs.append(int(row[1])) - for j in xrange(1,len(nucs)): + for j in range(1,len(nucs)): left = nucs[j-1] + 73 right = nucs[j] - 72 if right <= left: @@ -106,7 +106,7 @@ def process(self, params): self.findNFRs() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index d146d73..1919fc2 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -61,9 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov): self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] - self.vals = self.vals * self.nuc_cov/ self.cov.vals - - + self.vals = self.vals * self.nuc_cov / self.cov.vals class SignalDistribution: """Class for determining distribution of signal""" @@ -79,7 +77,7 @@ def simulateReads(self): sim_mat = np.reshape(sim_vect, self.vmat.mat.shape) return sim_mat def simulateDist(self, numiters = 1000): - self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters)) + self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)] def analStd(self): flatv = np.ravel(self.vmat.mat) var = calculateCov(self.probs, flatv, self.reads) @@ -91,8 +89,7 @@ def analMean(self): def norm(x, v, w, mean): """compute values of normal pdf with given mean and sd at values in x""" - norm = (1.0/(np.sqrt(2*np.pi*v)) * - np.exp(-(x - mean)**2/(2*v))) + norm = (1.0/(np.sqrt(2*np.pi*v)) * np.exp(-(x - mean)**2/(2*v))) norm = norm * (w/max(norm)) return norm @@ -139,14 +136,14 @@ def addNorms(x,params): """Add several normal distributions together""" l = len(x) fit = np.zeros(l) - i = len(params)/3 + i = len(params)//3 for j in range(i): fit += norm(x,params[j*3],params[3*j+1],params[3*j+2]) return fit def err_func(pars,y): """error function for normal fit; to be used for fitNorm""" x = np.linspace(0,len(y)-1,len(y)) - return sum((addNorms(x, pars) - y)**2) + return np.sum((addNorms(x, pars) - y)**2) def fitNorm(guess, bound, sig): """Fit a normal to the signal with lower and upperbounds to sd""" a = (sig,) @@ -156,21 +153,21 @@ def fitNorm(guess, bound, sig): allnucs = nuctrack.sorted_nuc_keys x = bisect_left(allnucs,index) if x == 0: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep: left = allnucs[x-1] means = (index - allnucs[x-1],0) else: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) if x == len(allnucs)-1: - right = index + nuctrack.params.nonredundant_sep/3 + 1 + right = index + nuctrack.params.nonredundant_sep//3 + 1 elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep: right = allnucs[x+1] means += (allnucs[x+1] - left,) else: - right = index + nuctrack.params.nonredundant_sep/3 +1 + right = index + nuctrack.params.nonredundant_sep//3 +1 sig = nuctrack.smoothed.vals[left:right] sig[sig<0] = 0 if len(means)==1: @@ -237,14 +234,14 @@ def __init__(self, chunk): def initialize(self, parameters): self.params = parameters def getFragmentMat(self): - self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1), - self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac) + self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1), + self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) @@ -298,7 +295,7 @@ def findAllNucs(self): #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal = 0, sep = self.params.redundant_sep, - boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2) + boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: @@ -310,7 +307,7 @@ def findAllNucs(self): self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, - map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys), + [self.nuc_collection[x].z for x in self.sorted_nuc_keys], self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): @@ -340,7 +337,7 @@ def process(self, params): self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 9e9cd59..c1d3707 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -8,7 +8,6 @@ from scipy import signal, optimize, stats import numpy as np import matplotlib.pyplot as plt -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragmentsizes import FragmentSizes from pyatac.tracks import Track, CoverageTrack from pyatac.chunk import Chunk @@ -23,9 +22,11 @@ class FragmentMixDistribution: def __init__(self, lower = 0, upper =2000): self.lower = lower self.upper = upper + def getFragmentSizes(self, bamfile, chunklist = None): self.fragmentsizes = FragmentSizes(self.lower, self.upper) self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist) + def modelNFR(self, boundaries = (35,115)): """Model NFR distribution with gamma distribution""" b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower @@ -64,14 +65,15 @@ def gamma_fit(X,o,p): self.nfr_fit.get(boundaries[1],self.upper))) nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001) self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc) + def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() - plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(), + plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(), label = "Observed") - plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit") - plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model") - plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit") + plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model") plt.legend() plt.xlabel("Fragment size") plt.ylabel("Relative Frequency") @@ -109,8 +111,8 @@ def calculateOccupancy(inserts, bias, params): nuc_probs = nuc_probs / np.sum(nuc_probs) nfr_probs = params.nfr_probs * bias nfr_probs = nfr_probs / np.sum(nfr_probs) - x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas) - logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l))) + x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas] + logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)]) logliks[np.isnan(logliks)] = -float('inf') occ = params.alphas[np.argmax(logliks)] #Compute upper and lower bounds for 95% confidence interval @@ -129,11 +131,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params): """Calculate Occupancy track""" offset=self.start - mat.start if offset self.params.min_occ and tmp.reads > 0: self.peaks[peak] = tmp + def getNucDist(self): """Get nucleosomal insert distribution""" nuc_dist = np.zeros(self.params.upper) - for peak in self.peaks.keys(): + for peak in list(self.peaks.keys()): sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank) sub_sum = np.sum(sub,axis=1) sub_sum = sub_sum / float(sum(sub_sum)) nuc_dist += sub_sum return(nuc_dist) + def process(self, params): """proces chunk -- calculat occupancy, get coverage, call peaks""" self.params = params @@ -246,9 +255,10 @@ def process(self, params): self.calculateOcc() self.getCov() self.callPeaks() + def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self, name) diff --git a/nucleoatac/__init__.py b/nucleoatac/__init__.py index 8cde705..abeeedb 100644 --- a/nucleoatac/__init__.py +++ b/nucleoatac/__init__.py @@ -1,3 +1 @@ -#Define version based on setup script -import pkg_resources -__version__ = pkg_resources.require("NucleoATAC")[0].version +__version__ = '0.4.0' diff --git a/nucleoatac/cli.py b/nucleoatac/cli.py index 459e85d..736e648 100644 --- a/nucleoatac/cli.py +++ b/nucleoatac/cli.py @@ -32,9 +32,9 @@ def nucleoatac_main(args): print('---------Calling NFR positions--------------------------------------------------') run_nfr(args) elif call == "run": - occ_args = parser.parse_args(map(str,['occ','--bed',args.bed,'--bam',args.bam, + occ_args = parser.parse_args(list(map(str,['occ','--bed',args.bed,'--bam',args.bam, '--fasta', args.fasta, '--pwm', args.pwm, - '--out',args.out,'--cores',args.cores])) + '--out',args.out,'--cores',args.cores]))) vprocess_args = parser.parse_args(['vprocess','--sizes',args.out+'.nuc_dist.txt','--out',args.out]) nuc_args_list = ['nuc','--bed',args.bed,'--bam',args.bam,'--out',args.out,'--cores', str(args.cores), '--occ_track', args.out + '.occ.bedgraph.gz','--vmat', args.out + '.VMat', diff --git a/nucleoatac/diff_occ.py b/nucleoatac/diff_occ.py index 4bedf0d..9033ea7 100644 --- a/nucleoatac/diff_occ.py +++ b/nucleoatac/diff_occ.py @@ -29,7 +29,7 @@ def _diffHelper(arg): occ.occ, [occ.peaks[i] for i in sorted(occ.peaks.keys())]) occ.removeData() except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -43,7 +43,7 @@ def _writeDiff(pos_queue, out): for pos in poslist: pos.write(out_handle) pos_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing occupancy track\n') traceback.print_exc() print() @@ -57,7 +57,7 @@ def run_diff(args, bases = 500000): """ chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down)) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down)) chunks.merge() maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks]))) #get fragmentsizes @@ -78,7 +78,7 @@ def run_diff(args, bases = 500000): diff_process.start() nuc_dist = np.zeros(args.upper) for j in sets: - tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params))) + tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: diff_queue.put(result[1]) pool1.close() diff --git a/nucleoatac/merge.py b/nucleoatac/merge.py index bed3e8b..0c5e61a 100644 --- a/nucleoatac/merge.py +++ b/nucleoatac/merge.py @@ -23,7 +23,7 @@ def __init__(self, chrom, start, end, occ, occ_lower, occ_upper, reads, source): self.reads = reads self.source = source def asBed(self): - out = "\t".join(map(str,[self.chrom,self.start,self.end,self.occ,self.occ_lower,self.occ_upper,self.reads, self.source])) + out = "\t".join(map(str,[self.chrom,self.start,self.end,self.occ,self.occ_lower,self.occ_upper,self.reads, self.source])) return out def write(self, handle): """write bed line for peak""" @@ -36,7 +36,7 @@ def __init__(self, *args): def read(bedfile, source, min_occ = 0): """Make a list of chunks from a tab-delimited bedfile""" if bedfile[-3:] == '.gz': - infile = gzip.open(bedfile,"r") + infile = gzip.open(bedfile,"rt") else: infile = open(bedfile,"r") out = NucList() @@ -103,7 +103,7 @@ def run_merge(args): pysam.tabix_compress(args.out + '.nucmap_combined.bed', args.out + '.nucmap_combined.bed.gz',force = True) shell_command('rm ' + args.out + '.nucmap_combined.bed') pysam.tabix_index(args.out + '.nucmap_combined.bed.gz', preset = "bed", force = True) - + diff --git a/nucleoatac/multinomial_cov.pyx b/nucleoatac/multinomial_cov.pyx index 20b434e..1bfc6c2 100644 --- a/nucleoatac/multinomial_cov.pyx +++ b/nucleoatac/multinomial_cov.pyx @@ -5,23 +5,19 @@ Created on Mon May 12 14:40:53 2014 @author: alicia """ -from __future__ import division import numpy as np cimport numpy as np cimport cython - -DTYPE = np.float ctypedef np.float_t DTYPE_t - - @cython.boundscheck(False) def calculateCov(np.ndarray[DTYPE_t, ndim=1] p, np.ndarray[DTYPE_t, ndim=1] v, int r): if p.shape[0]!= v.shape[0]: raise ValueError("p and v must be same shape") cdef DTYPE_t value cdef unsigned int i, j + value = 0 for i in range(p.shape[0]): for j in range(i,p.shape[0]): if i==j: diff --git a/nucleoatac/run_nfr.py b/nucleoatac/run_nfr.py index 9d48971..e069f53 100644 --- a/nucleoatac/run_nfr.py +++ b/nucleoatac/run_nfr.py @@ -31,7 +31,7 @@ def _nfrHelper(arg): out = nfr.nfrs nfr.removeData() except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -45,7 +45,7 @@ def _writeNFR(pos_queue, out): for pos in poslist: pos.write(out_handle) pos_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing occupancy track\n') traceback.print_exc() print() @@ -59,7 +59,7 @@ def _writeIns(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -104,7 +104,7 @@ def run_nfr(args): ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out)) ins_process.start() for j in sets: - tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params))) + tmp = pool1.map(_nfrHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: if params.ins_track is None: nfr_queue.put(result[0]) diff --git a/nucleoatac/run_nuc.py b/nucleoatac/run_nuc.py index 8eb5b55..e19c49b 100644 --- a/nucleoatac/run_nuc.py +++ b/nucleoatac/run_nuc.py @@ -32,7 +32,7 @@ def _nucHelper(arg): 'nucleoatac_signal.smooth' : nuc.smoothed} nuc.removeData() except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -46,7 +46,7 @@ def _writeNucSig(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing NucleoATAC signal track\n') traceback.print_exc() print() @@ -61,7 +61,7 @@ def _writeBackground(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing NucleoATAC background track\n') traceback.print_exc() print() @@ -76,7 +76,7 @@ def _writeSmooth(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing smoothed NucleoATAC signal track\n') traceback.print_exc() print() @@ -90,7 +90,7 @@ def _writeRaw(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing un-normalized NucleoATAC signal track\n') traceback.print_exc() print() @@ -107,7 +107,7 @@ def _writeNucPos(pos_queue, out): for pos in poslist: pos.write(out_handle) pos_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing nucleosome position file\n') traceback.print_exc() print() @@ -122,7 +122,7 @@ def _writeNucPosRedundant(pos_queue, out): for pos in poslist: pos.write(out_handle) pos_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing redundant nucleosome position file\n') traceback.print_exc() print() @@ -148,8 +148,8 @@ def run_nuc(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2, min_length = args.nuc_sep * 2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2, min_length = args.nuc_sep * 2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 if args.sizes is not None: @@ -181,7 +181,7 @@ def run_nuc(args): write_processes[i] = mp.Process(target = _writeFuncs[i], args=(write_queues[i], args.out)) write_processes[i].start() for j in sets: - tmp = pool1.map(_nucHelper, zip(j,itertools.repeat(params))) + tmp = pool1.map(_nucHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: for i in outputs: write_queues[i].put(result[i]) diff --git a/nucleoatac/run_occ.py b/nucleoatac/run_occ.py index 9b595d9..78b51de 100644 --- a/nucleoatac/run_occ.py +++ b/nucleoatac/run_occ.py @@ -32,7 +32,7 @@ def _occHelper(arg): occ.occ, [occ.peaks[i] for i in sorted(occ.peaks.keys())]) occ.removeData() except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -48,7 +48,7 @@ def _writeOcc(track_queue, out): track.write_track(out_handle2, vals = track.smoothed_lower) track.write_track(out_handle3, vals = track.smoothed_upper) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing occupancy track\n') traceback.print_exc() print() @@ -66,7 +66,7 @@ def _writePeaks(pos_queue, out): for pos in poslist: pos.write(out_handle) pos_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing occupancy track\n') traceback.print_exc() print() @@ -83,8 +83,8 @@ def run_occ(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 fragment_dist = FragmentMixDistribution(0, upper = args.upper) @@ -94,7 +94,7 @@ def run_occ(args): else: fragment_dist.getFragmentSizes(args.bam, chunks) fragment_dist.modelNFR() - fragment_dist.plotFits(args.out + '.occ_fit.eps') + fragment_dist.plotFits(args.out + '.occ_fit.pdf') fragment_dist.fragmentsizes.save(args.out + '.fragmentsizes.txt') params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ, flank = args.flank, bam = args.bam, ci = args.confidence_interval, step = args.step) @@ -115,8 +115,9 @@ def run_occ(args): peaks_process = mp.Process(target = _writePeaks, args=(peaks_queue, args.out)) peaks_process.start() nuc_dist = np.zeros(args.upper) + for j in sets: - tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params))) + tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: nuc_dist += result[0] write_queue.put(result[1]) @@ -130,6 +131,7 @@ def run_occ(args): pysam.tabix_compress(args.out + '.occpeaks.bed', args.out + '.occpeaks.bed.gz',force = True) shell_command('rm ' + args.out + '.occpeaks.bed') pysam.tabix_index(args.out + '.occpeaks.bed.gz', preset = "bed", force = True) + for i in ('occ','occ.lower_bound','occ.upper_bound'): pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.'+i+'.bedgraph.gz',force = True) shell_command('rm ' + args.out + '.' + i + '.bedgraph') @@ -138,13 +140,13 @@ def run_occ(args): dist_out = FragmentSizes(0, args.upper, vals = nuc_dist) dist_out.save(args.out + '.nuc_dist.txt') - print "Making figure" + print("Making figure") #make figure fig = plt.figure() - plt.plot(range(0,args.upper),dist_out.get(0,args.upper),label = "Nucleosome Distribution") + plt.plot(list(range(0,args.upper)),dist_out.get(0,args.upper),label = "Nucleosome Distribution") plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.nuc_dist.eps') + fig.savefig(args.out+'.nuc_dist.pdf') plt.close(fig) diff --git a/nucleoatac/run_vprocess.py b/nucleoatac/run_vprocess.py index 401f12e..7f81642 100644 --- a/nucleoatac/run_vprocess.py +++ b/nucleoatac/run_vprocess.py @@ -33,13 +33,13 @@ def run_vprocess(args): #Make extra plots if requeted if args.plot_extra: vmat.autoCorr() - vmat.plot_auto(args.out+'.vplot.Autocorr.eps') + vmat.plot_auto(args.out+'.vplot.Autocorr.pdf') vmat.converto1d() - vmat.plot_1d(args.out+'.vplot.InsertionProfile.eps') - vmat.plot_insertsize(args.out+'.vplot.InsertSizes.eps') + vmat.plot_1d(args.out+'.vplot.InsertionProfile.pdf') + vmat.plot_insertsize(args.out+'.vplot.InsertSizes.pdf') #make plot and save vmat.save(args.out+".VMat") - vmat.plot(filename = args.out+".VMat.eps") + vmat.plot(filename = args.out+".VMat.pdf") diff --git a/pyatac/VMat.py b/pyatac/VMat.py index 2c6be8a..4cbb270 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -34,7 +34,8 @@ def __init__(self, mat, lower, upper): self.mat = mat self.upper = upper self.lower = lower - self.w = mat.shape[1]/2 + self.w = mat.shape[1]//2 + def trim(self,lower,upper,w): """reduce the size of the vplot @@ -108,20 +109,20 @@ def norm_y(self,dist): def converto1d(self): """convert the 2d matrix to a 1d representation of insertions""" self.one_d = np.zeros(self.upper + self.upper%2 +2*self.w+1) - center = self.upper/2 + self.w + center = self.upper//2 + self.w for j in range(self.mat.shape[0]): for i in range(self.mat.shape[1]): ilen=j+self.lower val = copy(self.mat[j,i]) if ilen%2==0: - self.one_d[center-(self.w-i)-(ilen/2)]+= val - self.one_d[center-(self.w-i)+(ilen/2)]+= val + self.one_d[center-(self.w-i)-(ilen//2)]+= val + self.one_d[center-(self.w-i)+(ilen//2)]+= val else: - self.one_d[center-(self.w-i)-(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)-(ilen/2+1)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2+1)]+= val * 0.5 - self.one_d = self.one_d / sum(self.one_d) + self.one_d[center-(self.w-i)-(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)-(ilen//2+1)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2+1)]+= val * 0.5 + self.one_d = self.one_d / np.sum(self.one_d) def plot(self, mat=None, title=None, filename=None): """Plot current main matrix or specified matrix (of same dimensions)""" if mat is None: @@ -142,13 +143,14 @@ def plot(self, mat=None, title=None, filename=None): plt.close(fig) else: fig.show() + def plot_1d(self,filename=None): """plot the 1d insertion representation of the matrix""" fig = plt.figure() - xlim = len(self.one_d)/2 - plt.plot(range(-xlim,xlim+1),self.one_d) - plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') - plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') + xlim = len(self.one_d)//2 + plt.plot(list(range(-xlim,xlim+1)),self.one_d) + plt.vlines(-73,0,np.max(self.one_d)*1.1,linestyles='dashed') + plt.vlines(73,0,np.max(self.one_d)*1.1,linestyles='dashed') plt.xlabel("Position relative to dyad") plt.ylabel("Insertion Frequency") if filename: @@ -159,12 +161,13 @@ def plot_1d(self,filename=None): np.savetxt(filename2,self.one_d,delimiter="\t") else: fig.show() + def plot_insertsize(self,filename=None): """plot the insert size disribution in the main matrix""" fig = plt.figure() ins = np.sum(self.mat,axis=1) - ins = ins/sum(ins) - plt.plot(range(self.lower,self.upper),ins) + ins = ins / np.sum(ins) + plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") if filename: @@ -208,7 +211,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) try: new = VMat(np.array(mat), lower, upper) except NameError: diff --git a/pyatac/__init__.py b/pyatac/__init__.py index def3758..55aebd7 100644 --- a/pyatac/__init__.py +++ b/pyatac/__init__.py @@ -1,4 +1,3 @@ +__version__ = '0.4.0' + -#Define version based on setup script -import pkg_resources -__version__ = pkg_resources.require("NucleoATAC")[0].version diff --git a/pyatac/bias.py b/pyatac/bias.py index df2bcf0..6b36827 100644 --- a/pyatac/bias.py +++ b/pyatac/bias.py @@ -66,7 +66,7 @@ def open(name): elif state == 'nucleotides': nucleotides = line.strip('\n').split() elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) infile.close() try: new = PWM(np.array(mat), up, down, nucleotides) diff --git a/pyatac/chunk.py b/pyatac/chunk.py index c36fe8b..eb1ebfe 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -38,12 +38,13 @@ def slop(self, chromDict, up = 0, down = 0, new = False): else: self.start = newStart self.end = newEnd + def center(self, new = False): if self.strand == "-": - newEnd = self.end - (self.length()/2) + newEnd = self.end - (self.length()//2) newStart = newEnd - 1 else: - newStart = self.start + (self.length()/2) + newStart = self.start + (self.length()//2) newEnd = newStart +1 if new: out = Chunk(self.chrom, newStart, newEnd, @@ -92,12 +93,15 @@ def insert(self, *args): list.insert(self, args[0], args[1]) else: raise ValueError("Expecting Chunk") + def sort(self): """sort regions""" - list.sort(self, cmp = _chunkCompare) + list.sort(self, key=lambda chunk: [chunk.chrom, chunk.start]) + def isSorted(self): """check that regions are sorted""" - return all([_chunkCompare(self[i],self[i+1])==-1 for i in xrange(len(self)-1)]) + return all([_chunkCompare(self[i],self[i+1])==-1 for i in range(len(self)-1)]) + def slop(self, chromDict, up = 0, down = 0, new = False): out = ChunkList() for i in self: @@ -134,7 +138,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict min_offset = None, min_length = 1, chrom_source = "FASTA file"): """Make a list of chunks from a tab-delimited bedfile""" if bedfile[-3:] == '.gz': - infile = gzip.open(bedfile,"r") + infile = gzip.open(bedfile,"rt") else: infile = open(bedfile,"r") out = ChunkList() @@ -154,7 +158,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict start = int(in_line[1]) end = int(in_line[2]) chrom = in_line[0] - if chromDict is not None and chrom not in chromDict.keys(): + if chromDict is not None and chrom not in list(chromDict.keys()): bad_chroms.append(chrom) continue if min_offset: @@ -169,7 +173,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict if chromDict is not None and len(bad_chroms)>0: bad_chroms = set(bad_chroms) warn_message = (str(len(bad_chroms)) + " chromosome names in bed file not included in " + - chrom_source + ":\n" + "\n".join(bad_chroms) + "\n " + + chrom_source + ":\n" + "\n".join(bad_chroms) + "\n " + "These regions will be ignored in subsequent analysis") warnings.warn(warn_message) return out @@ -183,7 +187,7 @@ def convertChromSizes(chromDict, splitsize = None, offset = 0): out = ChunkList() for chrom in chrs: out.extend(ChunkList(*(Chunk(chrom, i, min(i + splitsize, chromDict[chrom] - offset)) - for i in xrange(offset, chromDict[chrom] - offset, splitsize)))) + for i in range(offset, chromDict[chrom] - offset, splitsize)))) return out def split(self, bases = None, items = None): """splits list of chunks into set of sublists""" @@ -202,7 +206,7 @@ def split(self, bases = None, items = None): out.append(self[i:(k+1)]) return out elif items is not None: - out = [ self[i:i+items] for i in xrange(0,len(self),items)] + out = [ self[i:i+items] for i in range(0,len(self),items)] return out else: raise Exception("Need to provide items or bases argument!") diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 0ffdc7c..7f5b2a8 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -3,8 +3,7 @@ import matplotlib.pyplot as plt import matplotlib.cm as cm from pyatac.tracks import InsertionTrack -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import makeFragmentMat +from pyatac.fragments import makeFragmentMat class ChunkMat2D: """Class that stores fragments in 2D matrix according to @@ -18,6 +17,7 @@ def __init__(self, chrom, start, end, lower, upper): self.ncol = end - start self.nrow = upper - lower self.mat = np.zeros((self.nrow, self.ncol)) + def get(self, lower = None, upper = None, start = None, end = None, flip = False): """get a subset (or all) of the matrix stored by ChunkMat2D object based on chromosal position and/or insert size""" @@ -52,15 +52,18 @@ def get(self, lower = None, upper = None, start = None, end = None, flip = False else: new[j,:] = self.mat[j,(x1-1):(x2)][::-1][1:] return new + def assign(self, mat): """assign a matrix to object""" if mat.shape != self.mat.shape: raise Exception("Dimensions of input mat are wrong. Uh oh!") self.mat = mat + def save(self, filename): """Save object in a text file""" head = ",".join(map(str,[self.chrom,self.start,self.end,self.lower,self.upper])) np.savetxt(filename,self.mat,delimiter="\t", header = head) + @staticmethod def open(filename): f = open(filename,'r') @@ -71,15 +74,16 @@ def open(filename): new= ChunkMat2D(elements[0],elements[1],elements[2],elements[3]) new.assign(mat) return new + def getIns(self): """Collape matrix into insertions. Will reduce span on chromosome""" pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower, mid+(i-1)//2]=1 + pattern[i-self.lower, mid-(i//2)]=1 ins = signal.correlate2d(self.mat,pattern,mode="valid")[0] - insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]/2, self.end - (pattern.shape[1]/2)) + insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]//2, self.end - (pattern.shape[1]//2)) insertion_track.assign_track(ins) return insertion_track def plot(self, filename = None, title = None, lower = None, @@ -117,7 +121,7 @@ def __init__(self, chrom, start, end, lower, upper, atac = True): def updateMat(self, fragment): row = fragment.insert - self.lower if self.mode == "centers": - col = (fragment.insert-1)/2 + fragment.left - self.start + col = (fragment.insert-1)//2 + fragment.left - self.start if col>=0 and col=0: self.mat[row, col] += 1 else: @@ -137,20 +141,24 @@ class BiasMat2D(ChunkMat2D): def __init__(self, chrom, start, end, lower, upper): ChunkMat2D.__init__(self,chrom, start, end, lower, upper) self.mat = np.ones(self.mat.shape) + def makeBiasMat(self, bias_track): """Make 2D matrix representing sequence bias preferences""" - offset = self.upper/2 + offset = self.upper//2 bias = bias_track.get(self.start-offset,self.end+offset) if not bias_track.log: nonzero = np.where(bias !=0)[0] bias = np.log(bias + min(bias[nonzero])) + pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower,mid+(i-1)//2]=1 + pattern[i-self.lower,mid-(i//2)]=1 + for i in range(self.upper-self.lower): self.mat[i]=np.exp(np.convolve(bias,pattern[i,:],mode='valid')) + def normByInsertDist(self, insertsizes): inserts = insertsizes.get(self.lower,self.upper) self.mat = self.mat * np.reshape(np.tile(inserts,self.mat.shape[1]),self.mat.shape,order="F") diff --git a/pyatac/fragments.pyx b/pyatac/fragments.py similarity index 63% rename from pyatac/fragments.pyx rename to pyatac/fragments.py index a463206..379db1e 100644 --- a/pyatac/fragments.pyx +++ b/pyatac/fragments.py @@ -1,26 +1,21 @@ #### Import needed modules ##### import pyatac.seq as seq import numpy as np -cimport numpy as np -cimport cython -from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment -from pysam.libcfaidx cimport FastaFile +from pysam import AlignmentFile, AlignedSegment +from pysam import FastaFile DTYPE = np.float -ctypedef np.float_t DTYPE_t - #### Import needed modules ##### #import pysam -@cython.boundscheck(False) -def makeFragmentMat(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int nrow = upper - lower - cdef int ncol = end - start - cdef np.ndarray[DTYPE_t, ndim=2] mat = np.zeros( (nrow, ncol), dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, row, col +def makeFragmentMat(bamfile, chrom, start, end, lower, upper, atac = 1): + nrow = upper - lower + ncol = end - start + mat = np.zeros( (nrow, ncol), dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: @@ -33,19 +28,16 @@ def makeFragmentMat(str bamfile, str chrom, int start, int end, int lower, int u l_pos = read.pos ilen = abs(read.template_length) row = ilen - lower - col = (ilen-1)/2 + l_pos - start + col = (ilen-1)//2 + l_pos - start if col >= 0 and col < ncol and row < nrow and row >= 0: mat[row, col] += 1 bamHandle.close() return mat -@cython.boundscheck(False) -def getInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] mat = np.zeros(npos, dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, r_pos +def getInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + mat = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: @@ -66,15 +58,11 @@ def getInsertions(str bamfile, str chrom, int start, int end, int lower, int upp bamHandle.close() return mat - -@cython.boundscheck(False) -def getStrandedInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] matplus = np.zeros(npos, dtype = DTYPE) - cdef np.ndarray[DTYPE_t, ndim=1] matminus = np.zeros(npos, dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, r_pos +def getStrandedInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + matplus = np.zeros(npos, dtype = DTYPE) + matminus = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: @@ -95,15 +83,10 @@ def getStrandedInsertions(str bamfile, str chrom, int start, int end, int lower, bamHandle.close() return (matplus, matminus) - - -@cython.boundscheck(False) -def getAllFragmentSizes(str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) +def getAllFragmentSizes(bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) # loop over samfile - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int ilen + bamHandle = AlignmentFile(bamfile) for read in bamHandle: if read.is_proper_pair and not read.is_reverse: if atac: @@ -118,14 +101,10 @@ def getAllFragmentSizes(str bamfile, int lower, int upper, int atac = 1): bamHandle.close() return sizes - -@cython.boundscheck(False) -def getFragmentSizesFromChunkList(chunks, str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) +def getFragmentSizesFromChunkList(chunks, bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) # loop over samfile - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int ilen, l_pos, center + bamHandle = AlignmentFile(bamfile) for chunk in chunks: for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - upper), chunk.end + upper): if read.is_proper_pair and not read.is_reverse: @@ -138,9 +117,9 @@ def getFragmentSizesFromChunkList(chunks, str bamfile, int lower, int upper, int else: l_pos = read.pos ilen = abs(read.template_length) - center = l_pos + (ilen-1)/2 + center = l_pos + (ilen-1) // 2 if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: - sizes[ilen - lower]+=1 + sizes[ilen - lower] += 1 bamHandle.close() return sizes diff --git a/pyatac/fragments.pyxbld b/pyatac/fragments.pyxbld deleted file mode 100644 index 51d39b4..0000000 --- a/pyatac/fragments.pyxbld +++ /dev/null @@ -1,10 +0,0 @@ - -def make_ext(modname, pyxfilename): - from distutils.extension import Extension - import pysam - return Extension(name=modname, - sources=[pyxfilename], - extra_link_args=pysam.get_libraries(), - include_dirs=pysam.get_include(), - define_macros=pysam.get_defines()) - diff --git a/pyatac/fragmentsizes.py b/pyatac/fragmentsizes.py index 4fd6ba0..10fa88e 100644 --- a/pyatac/fragmentsizes.py +++ b/pyatac/fragmentsizes.py @@ -5,13 +5,8 @@ """ import numpy as np -import pyximport -pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragments import getAllFragmentSizes, getFragmentSizesFromChunkList - - - class FragmentSizes: """Class for storing fragment size distribution""" def __init__(self, lower, upper, atac = True, vals = None): @@ -71,7 +66,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'sizes': - fragmentsizes = np.array(map(float,line.rstrip("\n").split("\t"))) + fragmentsizes = np.array(list(map(float,line.rstrip("\n").split("\t")))) try: new = FragmentSizes(lower, upper, vals = fragmentsizes) except NameError: diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 29a6fe2..970ff80 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -12,7 +12,6 @@ import numpy as np import pysam import traceback -import pyximport; pyximport.install() from pyatac.tracks import CoverageTrack from pyatac.chunk import ChunkList from pyatac.utils import shell_command, read_chrom_sizes_from_bam @@ -22,14 +21,14 @@ def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: - offset = args.window / 2 - mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) + offset = args.window // 2 + mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window) cov.vals *= args.scale / float(args.window) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -42,7 +41,7 @@ def _writeCov(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing coverage track\n') traceback.print_exc() print() @@ -62,7 +61,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items=bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() @@ -75,7 +74,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): write_process = mp.Process(target = _writeCov, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_covHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_ins.py b/pyatac/get_ins.py index 8d28e96..f993450 100644 --- a/pyatac/get_ins.py +++ b/pyatac/get_ins.py @@ -12,7 +12,6 @@ import numpy as np import pysam import traceback -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.tracks import InsertionTrack from pyatac.chunk import ChunkList from pyatac.utils import shell_command, read_chrom_sizes_from_bam @@ -26,7 +25,7 @@ def _insHelperSmooth(arg): ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) ins.smooth_track(args.smooth, window = "gaussian", mode= 'valid') except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -39,7 +38,7 @@ def _insHelper(arg): ins = InsertionTrack(chunk.chrom, chunk.start, chunk.end) ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -51,7 +50,7 @@ def _writeIns(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -85,9 +84,9 @@ def get_ins(args, bases = 50000, splitsize = 1000): write_process.start() for j in sets: if args.smooth: - tmp = pool1.map(_insHelperSmooth, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelperSmooth, list(zip(j,itertools.repeat(args)))) else: - tmp = pool1.map(_insHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index 2419523..a0cfd25 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -31,7 +31,7 @@ def _nucleotideHelper(arg): mat += submat n += 1 except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+'\n') + print(('Caught exception when processing:\n'+ chunk.asBed()+'\n')) traceback.print_exc() print() raise e @@ -66,7 +66,7 @@ def get_nucleotide(args): params = _NucleotideParameters(args.up, args.down, args.fasta, args.dinucleotide) sets = chunks.split(bases = 10000) pool = Pool(processes = args.cores) - tmp = pool.map(_nucleotideHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_nucleotideHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = np.zeros(params.matsize) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index bdfc534..af580d8 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -13,8 +13,6 @@ import pyatac.seq as seq from pyatac.chunk import ChunkList from pyatac.bias import PWM -import pyximport -pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.tracks import InsertionTrack from pyatac.utils import read_chrom_sizes_from_fasta @@ -34,7 +32,7 @@ def _pwmHelper(arg): mat += ins.getStrandedInsertionSequences(params.fasta, params.nucleotides, up = params.up, down = params.down) n += sum(ins.vals) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -62,14 +60,14 @@ def get_pwm(args, bases = 50000, splitsize = 1000): chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank) sets = chunks.split(bases = bases) params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta, lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym) pool = Pool(processes = args.cores) - tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_pwmHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() n = 0.0 diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index cf66f54..1e040bd 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -30,9 +30,9 @@ def get_sizes(args): if not args.no_plot: #make figure fig = plt.figure() - plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out) + plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.fragmentsizes.eps') + fig.savefig(args.out+'.fragmentsizes.pdf') plt.close(fig) diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index 94e6640..ec777a5 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -23,7 +23,7 @@ def _biasHelper(arg): bias = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end) bias.computeBias(params.fasta, params.chrs, params.pwm) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -36,7 +36,7 @@ def _writeBias(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -61,14 +61,16 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): else: args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1]) params = _BiasParams(args.fasta, args.pwm) + if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) - chunks.checkChroms(params.chrs.keys()) + chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) + maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.Scores.bedgraph','w') @@ -77,7 +79,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): write_process = mp.Process(target = _writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params))) + tmp = pool.map(_biasHelper, list(zip(j,itertools.repeat(params)))) for track in tmp: write_queue.put(track) pool.close() diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index ce80274..6e8e8a5 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -31,10 +31,9 @@ def _biasVplotHelper(arg): for chunk in chunks: try: chunk.center() - biastrack = InsertionBiasTrack(chunk.chrom, chunk.start - params.flank - 1 - (params.upper/2), - chunk.end + params.flank + params.upper/2+1) + biastrack = InsertionBiasTrack(chunk.chrom, chunk.start-params.flank-1-(params.upper//2), chunk.end+params.flank+params.upper//2+1) if params.bg is not None: - biastrack.read_track(params.bg, empty = 0) + biastrack.read_track(params.bg, empty=0) else: biastrack.computeBias(params.fasta, params.chrs, params.pwm) biasmat = BiasMat2D(chunk.chrom, chunk.start - params.flank - 1, chunk.end + params.flank, @@ -48,7 +47,7 @@ def _biasVplotHelper(arg): else: mat += add except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() raise e @@ -80,19 +79,19 @@ def make_bias_vplot(args): sizes = args.sizes, scale = args.scale, pwm = args.pwm, fasta = args.fasta) pool = Pool(processes = args.cores) - tmp = pool.map(_biasVplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_biasVplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) ##Turn matrix into VMat object vmat=VMat(result,args.lower,args.upper) - vmat.plot(filename=args.out+".Bias.Vplot.eps") + vmat.plot(filename=args.out+".Bias.Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".Bias.VMat") diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index edb57a2..90538b3 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -8,12 +8,9 @@ # import necessary for python import os import numpy as np -#import matplotlib as mpl -#mpl.use('PS') -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.chunk import ChunkList from pyatac.chunkmat2d import FragmentMat2D -import VMat as V +from . import VMat as V from multiprocessing import Pool import itertools import traceback @@ -35,7 +32,7 @@ def _vplotHelper(arg): add = add/np.sum(add) result += add except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -67,20 +64,20 @@ def make_vplot(args): params = _VplotParams(flank = args.flank, lower = args.lower, upper = args.upper, bam = args.bam, atac = args.atac, scale = args.scale) pool = Pool(processes = args.cores) - tmp = pool.map(_vplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_vplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) ##Turn matrix into VMat object vmat=V.VMat(result,args.lower,args.upper) if not args.no_plot: - vmat.plot(filename=args.out+".Vplot.eps") + vmat.plot(filename=args.out+".Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".VMat") diff --git a/pyatac/seq.py b/pyatac/seq.py index df825ed..75296b4 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -22,7 +22,7 @@ def get_sequence(chunk, fastafile): return sequence.upper() -DNA_Translation = string.maketrans('ACGT', 'TGCA') +DNA_Translation = ''.maketrans('ACGT', 'TGCA') def complement(sequence): """Get complement of DNA sequenceuence""" @@ -41,7 +41,7 @@ def seq_to_mat(sequence, nucleotides): raise Exception("Usage Error! Nucleotides must all be of same length! No mixing single nucleotides with dinucleotides, etc") mat = np.zeros((len(nucleotides),len(sequence)-l+1)) for i in range(len(nucleotides)): - mat[i] = np.array(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)])) + mat[i] = np.array(list(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)]))) return(mat) def getNucFreqs(fasta, nucleotides): diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 67a03bf..1b5e8f2 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -62,7 +62,7 @@ def _signalHelper(arg): sig[np.isnan(sig)]=0 agg += sig except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() bg.close() @@ -96,7 +96,7 @@ def get_signal(args): args.scale, args.positive, args.all) sets = chunks.split(items = min(args.cores*20,len(chunks))) pool = Pool(processes = args.cores) - tmp = pool.map(_signalHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_signalHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() if args.all: @@ -110,10 +110,10 @@ def get_signal(args): if args.norm: result = result / len(chunks) fig = plt.figure() - plt.plot(range(-args.up,args.down+1),result) + plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") - fig.savefig(args.out+'.agg.track.eps') + fig.savefig(args.out+'.agg.track.pdf') plt.close(fig) np.savetxt(args.out+'.agg.track.txt',result,delimiter="\t") diff --git a/pyatac/tracks.py b/pyatac/tracks.py index d1ac322..9881aae 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -9,8 +9,7 @@ from pyatac.bedgraph import BedGraphFile from pyatac.chunk import Chunk from pyatac.utils import smooth -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import getInsertions, getStrandedInsertions +from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement class Track(Chunk): @@ -47,7 +46,7 @@ def write_track(self, handle, start = None, end = None, vals = None, write_zero if vals is None: vals=self.vals if len(vals)!=self.end-self.start: - print len(vals),self.end-self.start + print(len(vals),self.end-self.start) raise Exception("Error! Inconsistency between length of \ values and start/end values") prev_value = None @@ -88,14 +87,14 @@ def read_track(self, bedgraph, start = None, end = None, empty = np.nan, flank = def log(self, pseudo = 1): """Log values. Add psuedo count so values don't equal 0 before logging""" if self.log: - print "Logging a track that is already log..." + print("Logging a track that is already log...") adjusted = self.vals + pseudo self.vals = np.log(adjusted) self.log = True def exp(self): """Take exponent of values""" if not self.log: - print "taking exponent of a non-logged track..." + print("taking exponent of a non-logged track...") self.vals = np.exp(self.vals) self.log = False def smooth_track(self, window_len, window='flat', sd = None, @@ -105,8 +104,8 @@ def smooth_track(self, window_len, window='flat', sd = None, self.vals = smooth(self.vals, window_len, window = window, sd = sd, mode = mode, norm = norm) if mode == 'valid': - self.start = self.start + window_len/2 - self.end = self.end - window_len/2 + self.start = self.start + window_len//2 + self.end = self.end - window_len//2 def get(self, start = None, end = None, pos = None): """Obtain value of track at particular interval or position""" if pos: @@ -135,7 +134,7 @@ def plot(self, name = None, start = None, end = None, filename=None): name = self.name values = self.get(start,end) fig = plt.figure() - plt.plot(range(start,end),values) + plt.plot(list(range(start,end)),values) plt.xlabel(self.chrom) plt.ylabel(name) if filename: @@ -143,7 +142,7 @@ def plot(self, name = None, start = None, end = None, filename=None): plt.close(fig) #Also save text output! filename2 = ".".join(filename.split(".")[:-1]+['txt']) - out = np.vstack((range(start,end),values)) + out = np.vstack((list(range(start,end)),values)) np.savetxt(filename2,out,delimiter="\t") else: fig.show() @@ -208,10 +207,9 @@ def __init__(self, chrom, start, end): Track.__init__(self, chrom, start, end, "coverage") def calculateCoverage(self, mat, lower, upper, window_len): """Compute coverage of fragment centers using flat window""" - offset=self.start-mat.start-(window_len/2) + offset=self.start-mat.start-(window_len//2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: @@ -224,8 +222,7 @@ def calculateCoverageSmooth(self,mat,lower,upper,window_len,sd): """Compute coverage of fragment centers using gaussia window""" offset=self.start-mat.start-(window_len/2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: diff --git a/pyatac/utils.py b/pyatac/utils.py index d972f4d..9922f54 100644 --- a/pyatac/utils.py +++ b/pyatac/utils.py @@ -90,7 +90,7 @@ def call_peaks(sigvals, min_signal = 0, sep = 120, boundary = None, order =1): replace = min(sigvals[~np.isnan(sigvals)]) sigvals[np.isnan(sigvals)]=replace if boundary is None: - boundary = sep/2 + boundary = sep//2 random = np.random.RandomState(seed = 25) l = len(sigvals) peaks = signal.argrelmax(sigvals *(1+ diff --git a/setup.py b/setup.py index e654055..f1703fa 100644 --- a/setup.py +++ b/setup.py @@ -3,34 +3,29 @@ import sys import os -if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: - sys.stderr.write("CRITICAL: Python version must be 2.7!\n") - sys.exit(1) - - class NoTestCommand(TestCommand): def run(self): print("NucleoATAC does not support running tests with " - "'python setup.py test'. Please run 'python tests.py'") + "'python setup.py test'. The test suite can be run via 'pytest'") setup(name='NucleoATAC', - version='0.3.4', + version='0.4.0', description='python package for calling nucleosomes with ATAC-Seq', classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', 'Topic :: Scientific/Engineering :: Bio-Informatics'], keywords='ATAC-Seq sequencing bioinformatics', url='https://github.com/GreenleafLab/NucleoATAC', author='Alicia Schep', - author_email='aschep@stanford.edu', + author_email='aschep@gmail.com', license='MIT', packages=['pyatac','pyatac.pwm','nucleoatac','nucleoatac.vplot'], - install_requires=['cython >= 0.22','numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], + python_requires='>=3.8', + install_requires=['cython > 0.22', 'numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, - tests_require=['nose'], cmdclass = {'test': NoTestCommand}) diff --git a/tests.py b/tests.py deleted file mode 100644 index b020255..0000000 --- a/tests.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python - -import matplotlib -import nose -matplotlib.use('agg') - -nose.main() diff --git a/tests/test_cli.py b/tests/test_cli.py index 60b44d0..e1b2a47 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,31 +9,31 @@ class NucleoATACTestCase(TestCase): """ def setUp(self): self.parser = nucleoatac_parser() - def test_run(self): - cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --cores 2" - args = self.parser.parse_args(cmd.split()[1:]) - nucleoatac_main(args) def test_occ(self): - cmd = "nucleoatac occ --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --cores 2" + cmd = "nucleoatac occ --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_vprocess(self): - cmd = "nucleoatac vprocess --out example/test_results/test --sizes example/example_results/example.nuc_dist.txt" + cmd = "nucleoatac vprocess --out example/_results/example --sizes example/example_results/example.nuc_dist.txt" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_nuc(self): - cmd = "nucleoatac nuc --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --vmat example/example_results/example.VMat --cores 2" + cmd = "nucleoatac nuc --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --vmat example/example_results/example.VMat --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_merge(self): - cmd = "nucleoatac merge --occpeaks example/example_results/example.occpeaks.bed.gz --nucpos example/example_results/example.nucpos.bed.gz --out example/test_results/test" + cmd = "nucleoatac merge --occpeaks example/example_results/example.occpeaks.bed.gz --nucpos example/example_results/example.nucpos.bed.gz --out example/_results/example" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_nfr(self): - cmd = "nucleoatac nfr --bed example/example.bed --occ_track example/example_results/example.occ.bedgraph.gz --calls example/example_results/example.nucmap_combined.bed.gz --out example/test_results/test --bam example/example.bam --fasta example/sacCer3.fa" + cmd = "nucleoatac nfr --bed example/example.bed --occ_track example/example_results/example.occ.bedgraph.gz --calls example/example_results/example.nucmap_combined.bed.gz --out example/_results/example --bam example/example.bam --fasta example/sacCer3.fa" + args = self.parser.parse_args(cmd.split()[1:]) + nucleoatac_main(args) + def test_run(self): + cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) - + class PyATACTestCase(TestCase): """ Base TestCase class for pyatac commands @@ -41,10 +41,10 @@ class PyATACTestCase(TestCase): def setUp(self): self.parser = pyatac_parser() def test_sizes(self): - cmd = "pyatac sizes --bam example/example.bam --out example/test_results/test_sizes" + cmd = "pyatac sizes --bam example/example.bam --out example/_results/example_sizes" args = self.parser.parse_args(cmd.split()[1:]) pyatac_main(args) - + diff --git a/tests/test_nucleosome_calling.py b/tests/test_nucleosome_calling.py new file mode 100644 index 0000000..a248424 --- /dev/null +++ b/tests/test_nucleosome_calling.py @@ -0,0 +1,62 @@ +from unittest import TestCase +import numpy as np +import matplotlib +matplotlib.use('agg') + +from pyatac.utils import read_chrom_sizes_from_fasta +import nucleoatac.NucleosomeCalling as Nuc +import pyatac.VMat as V +from pyatac.chunkmat2d import FragmentMat2D +from pyatac.chunk import ChunkList +from pyatac.fragmentsizes import FragmentSizes +from pyatac.bias import PWM + + +class Test_NuclesomeCalling(TestCase): + """class to test nucleosome calling""" + def setUp(self): + """setup Test_occupancy class by establishing parameters""" + bed_list = ChunkList.read('example/example.bed') + self.chunk = bed_list[0] + self.vmat = V.VMat.open('example/example_results/example.VMat') + #chrs = read_chrom_sizes_from_fasta('example/sacCer3.fa') + #pwm = PWM.open('Human') + #chunks = ChunkList.read( + # 'example/example.bed', + # chromDict = chrs, + # min_offset = self.vmat.mat.shape[1] + self.vmat.upper//2 + max(pwm.up,pwm.down) + 120//2, + # min_length = 120 * 2 + #) + #chunks.slop(chrs, up = 120//2, down = 120//2) + #chunks.merge() + #self.chunk = chunks[0] + fragment_dist = FragmentSizes.open('example/example_results/example.fragmentsizes.txt') + self.params = Nuc.NucParameters(vmat = self.vmat, fragmentsizes = fragment_dist, + bam = 'example/example.bam', fasta = 'example/sacCer3.fa', pwm = 'Human', + occ_track = 'example/example_results/example.occ.bedgraph.gz', + sd = 25, nonredundant_sep = 120, redundant_sep = 25, + min_z = 3, min_lr = 0 , atac = True) + nuc = Nuc.NucChunk(self.chunk) + nuc.process(self.params) + self.out = { + 'nucpos' : [nuc.nuc_collection[i] for i in sorted(nuc.nonredundant)], + 'nucpos.redundant' : [nuc.nuc_collection[i] for i in sorted(nuc.redundant)], + 'nucleoatac_signal' : nuc.norm_signal, + 'nucleoatac_raw' : nuc.nuc_signal, + 'nucleoatac_background' : nuc.bias, + 'nucleoatac_signal.smooth' : nuc.smoothed + } + def test_calling(self): + """test nucleosome positions""" + self.assertTrue(len(self.out['nucpos']) == 3) + self.assertTrue(self.out['nucpos'][0].start == 706773) + self.assertTrue(abs(self.out['nucpos'][0].z - 8.2365) < 0.001) + self.assertTrue(self.out['nucpos'][0].nfr_cov == 6.0) + self.assertTrue(self.out['nucpos'][0].nuc_cov == 54.0) + self.assertTrue(abs(self.out['nucpos'][0].nuc_signal - 15.3150) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].norm_signal - 6.4621) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].smoothed - 3.4974) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].lr - 22.3557) < 0.001) + def test_signal(self): + """test signal output""" + self.assertTrue(len(self.out['nucleoatac_signal'].vals) == 1093) diff --git a/tests/test_occupancy.py b/tests/test_occupancy.py index f335f87..01eb403 100644 --- a/tests/test_occupancy.py +++ b/tests/test_occupancy.py @@ -31,7 +31,7 @@ def test_occupancy_calc3(self): a = np.random.multinomial(10,self.fragment_dist.nfr_fit.get()) b = np.random.multinomial(30,self.fragment_dist.nuc_fit.get()) results[i],lower[i],upper[i] = calculateOccupancy(a+b,np.array([1,1,1]),self.params) - print np.mean(results) + self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85) @@ -51,7 +51,7 @@ def test_occupancy_calc4(self): a = np.random.multinomial(10,nfrprob) b = np.random.multinomial(30,nucprob) results[i],lower[i],upper[i] = calculateOccupancy(a+b,bias,self.params) - print np.mean(results) + self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85)