@HWI-XXXXX_0470:1:1:1761:946#0/1 NCCCTTTGGATTTGTCCTTTCAGATGCATTAGCCAT +HWI-XXXXX_0470:1:1:1761:946#0/1 BKNNJMHHEJRTRRR_______________Y[[[Y_
The first line, starting with “@”, and the third, starting with “+”, are descriptive, contain the name, slide number, etc. The second line is the read sequence of nucleotides, in our case there are 36 of them. The fourth line is the probabilities of reading the nucleotides. Below is a link to an exact description of the file format and how to get numbers from the letter representation http://en.wikipedia.org/wiki/FASTQ_format . /usr/src/bowtie-0.12.7/bowtie -q -v 2 -m 1 --best --strata -p 8 -S \ /usr/src/bowtie-0.12.7/indexes/hg19 - | samtools view -Sb - >${DIRNAME}/${SEQNAME}.bam
@HD VN:1.0 SO:unsorted @SQ SN:chr10 LN:135374737 @SQ SN:chr11 LN:134452384 … @PG ID:Bowtie VN:0.12.7 CL:"/usr/src/bowtie-0.12.7/bowtie -q -v 1 -m 1 --best --strata -p 8 -S" USI-EAS21_8_3445_7_1_452_540 0 chr5 39195556 255 23M * 0 0 CTCCAGATTCTCTGGAAAGATGG SSSSSSPSSSSSSSSGNNSJSSL XA:i:0 MD:Z:23 NM:i:0 USI-EAS21_8_3445_7_1_170_242 0 chr14 73509925 255 23M * 0 0 CTGCATTAGACCTAGGCTTAGAA SSSSSSSSSSSSSSSSSSSGSNN XA:i:0 MD:Z:23 NM:i:0 USI-EAS21_8_3445_7_1_290_985 0 chr7 142613233 255 23M * 0 0 AGCTGACTGGCAAGCAACAGAGT JSSSSSSSSSSSPSSGSSLSNSL XA:i:0 MD:Z:23 NM:i:0 USI-EAS21_8_3445_7_1_881_711 4 * 0 0 * * 0 0 ATCGGAAGAGCTCGTATGCCGTC SSSSPSSSSSSSSSSSSSSSSNS XM:i:0 USI-EAS21_8_3445_7_1_897_318 0 chr3 50101427 255 23M * 0 0 GGGCGCAGAACCGCTGCTGCTGC SPSSSSSSSPPSSSSNSPPPLSL XA:i:1 MD:Z:15A7NM:i:1
#ifndef PoissonHandler_H #define PoissonHandler_H #include <config.hpp> class PoissonHandler : public QState { Q_OBJECT private: GenomeDescription *sql_input; GenomeDescription *sam_input; QSettings setup; public: PoissonHandler(GenomeDescription *sql,GenomeDescription *sam,HandledData &output,QState *parent=0); ~PoissonHandler(); protected: virtual void onEntry(QEvent* event); }; #endif //
#include "PoissonHandler.hpp" //------------------------------------------------------------- //------------------------------------------------------------- PoissonHandler::~PoissonHandler() { } //------------------------------------------------------------- //------------------------------------------------------------- PoissonHandler::PoissonHandler(GenomeDescription *sql,GenomeDescription *sam,HandledData &,QState * parent): QState(parent), sql_input(sql), sam_input(sam) { } //------------------------------------------------------------- //------------------------------------------------------------- void PoissonHandler::onEntry(QEvent*) { if(!setup.contains("AvgTagWindow")) setup.setValue("AvgTagWindow",500); if(!setup.contains("siteShift")) setup.setValue("siteShift",75); qint32 window=setup.value("AvgTagWindow").toInt(); qint32 half_window=0;//window/2; qint32 shift=setup.value("siteShift").toInt(); int c=0;//sense count, even/odd c sense/nonsense QMap<QString,int> poisson; QMapIterator<QChar,QMap<QString,QMap<qint32,qint32> > > sense(sql_input->genome); while(sense.hasNext()) { sense.next(); c++; QMapIterator<QString,QMap<qint32,qint32> > dictionary(sense.value()); while(dictionary.hasNext()) { dictionary.next(); QMapIterator<qint32,qint32> s(dictionary.value()); //sql result for strand+chromosome if((sam_input->genome['+']).contains(dictionary.key())) //chromosome chr1,chr2... { QMapIterator<qint32,qint32> k( (sam_input->genome['+'])[dictionary.key()] );//+ strand,sam file, iteration over strand+chromosome s.next(); //c++; k.next(); while (k.hasNext()) // + strand, iteration over strand+chromosome+each position { //if align is inside of the segment of the start site qint32 x1=k.key()- s.key() +half_window +shift; qint32 x2=x1-window; if( (x1^x2) < 0 ) {//inside segment poisson[((sql_input->genome_h[sense.key()])[dictionary.key()])[s.key()]]+=k.value(); k.next(); } else { if((k.key() +shift)>(s.key()+half_window)) { if(!s.hasNext()) break; s.next();// c++; } else { k.next(); } } }//while k.hasNext }//if + strand sam input if((sam_input->genome['-']).contains(dictionary.key())) { QMapIterator<qint32,qint32> j( (sam_input->genome['-'])[dictionary.key()] );//- strand,sam file s.toFront(); s.next(); j.next(); while (j.hasNext()) // - strand { //if align is inside of the segment of the start site qint32 x1=j.key()- s.key() +half_window -shift; qint32 x2=x1-window; if( (x1^x2) < 0 ) {//inside segment poisson[((sql_input->genome_h[sense.key()])[dictionary.key()])[s.key()]]+=j.value(); j.next(); } else { if((j.key() -shift)>(s.key()+half_window)) { if(!s.hasNext()) break; s.next(); } else { j.next(); } } } }//if - strand sam file }//while dictionary }//while sense //Poisson { qreal lamda=(qreal)(window*(sam_input->total-sam_input->notAligned))/(sam_input->tot_len*0.741); QFile _outFile; _outFile.setFileName(setup.value("outFileName").toString()); _outFile.open(QIODevice::WriteOnly|QIODevice::Truncate); qreal exp_l=qExp(-lamda); QString str; foreach(str,poisson.keys()) { //P(x,L)=(e**-1)*(L**x)/x! // qreal pois=exp_l; //qExp(-lamda)*qPow(lamda,poisson[str]); // for(int i=2; i<= poisson[str];i++) // pois=pois*lamda/i; //P(x,L)=(e**-1)*E(1-k)(L**x(i))/x(i)! qreal pois=exp_l; qreal delim=1.0; qreal sum=1.0; for(int i=1; i<= poisson[str];i++) { delim*=lamda/i; sum+=delim; } pois*=sum; _outFile.write( (QString("%1\t%2\t%3\n").arg(str).arg(poisson[str]).arg(pois,0,'e',20)).toLocal8Bit() ); } _outFile.flush(); _outFile.close(); }//if(1) }//end of function //------------------------------------------------------------- //-------------------------------------------------------------
Source: https://habr.com/ru/post/137267/
All Articles