; oligo-analysis -sort -v 1 -return occ,freq,mseq,proba,rank,zscore -l 6 -1str -noov -thosig 0 -thmssig 0 -org Saccharomyces_cerevisiae -bg intergenic -i data/utr3/orthologs/stricto/purged/YAL060W_utr3_500_ortho_purged.fasta -o data/utr3/orthologs/stricto/ORF-wise_oligos/YAL060W_utr3_500_ortho_oligos_6nt-1str-noov_osig0_msig0 ; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. ; Detection of over-represented words ; Oligomer length 6 ; Input file data/utr3/orthologs/stricto/purged/YAL060W_utr3_500_ortho_purged.fasta ; Input format fasta ; Output file data/utr3/orthologs/stricto/ORF-wise_oligos/YAL060W_utr3_500_ortho_oligos_6nt-1str-noov_osig0_msig0 ; Discard overlapping matches ; Counted on a single strand ; Background model intergenic ; Organism Saccharomyces_cerevisiae ; Method Frequency file ; Expected frequency file /Users/jvanheld/rsa-tools/data/genomes/Saccharomyces_cerevisiae/oligo-frequencies/6nt_intergenic_Saccharomyces_cerevisiae-1str.freq ; Sequence type DNA ; Nb of sequences 6 ; Sum of sequence lengths 3000 ; discarded occurrences 85 (contain other letters than ACGT) ; nb possible positions 2885 ; total oligo occurrences 2475 ; total overlapping occurrences 40 ; total non overlapping occ 2435 ; alphabet size 4 ; nb possible oligomers 4096 ; threshold on occ sig 0 ; threshold on matching seq sig 0 ; threshold on occ proba 0.000244140625 ; threshold on match. seq proba 0.000244140625 ; Sequences: ; cerevisiae_SGD_BDH1_YAL060W 500 ; paradoxus_MIT_459_YAL060W 500 ; mikatae_MIT_37_YAL060W 500 ; mikatae_WashU_Contig1893.3_YAL060W 500 ; kudriavzevii_WashU_Contig1874.1_YAL060W 500 ; bayanus_MIT_42_YAL060W 500 ; ; column headers ; 1 seq oligomer sequence ; 2 id oligomer identifier ; 3 observed_freq observed relative frequency ; 4 expected_freq expected relative frequency ; 5 occ observed occurrences ; 6 exp_occ expected occurrences ; 7 occ_P occurrence probability (binomial) ; 8 occ_E E-value for occurrences (binomial) ; 9 occ_sig occurrence significance (binomial) ; 10 zscore z-score (normal) ; 11 ovl_occ number of overlapping occurrences (discarded from the count) ; 12 ms number of matching sequences ; 13 exp_ms expected number of matching sequences ; 14 ms_P matching sequence probability (binomial) ; 15 ms_E E-value for matching sequences (binomial) ; 16 ms_sig matching sequenc significance (binomial) ; 17 ms_freq observed matching sequence frequency ; 18 exp_msf expected matching sequence frequency ; 19 rank rank ;seq id observed_freq expected_freq occ exp_occ occ_P occ_E occ_sig zscore ovl_occ ms exp_ms ms_P ms_E ms_sig ms_freq exp_msf rank gagctg gagctg 0.0044444444444 0.0001099948446 11 0.31 4.9e-14 2.0e-10 9.70 19.16 0 4 0.32 0.00011 0.44 0.4 0.66667 0.05299 1 agctgc agctgc 0.0028282828283 0.0001389408563 7 0.40 2.1e-07 8.7e-04 3.06 10.49 0 5 0.40 7.4e-06 0.03 1.5 0.83333 0.06647 2 tctaag tctaag 0.0028282828283 0.0001934812363 7 0.55 1.9e-06 7.7e-03 2.11 8.68 0 5 0.55 3.5e-05 0.14 0.8 0.83333 0.09134 3 ccaaga ccaaga 0.0028282828283 0.0002340056527 7 0.67 6.5e-06 2.7e-02 1.58 7.75 0 5 0.66 8.5e-05 0.35 0.5 0.83333 0.10939 4 ctaaga ctaaga 0.0028282828283 0.0002376619910 7 0.68 7.2e-06 2.9e-02 1.53 7.68 0 5 0.67 9.2e-05 0.38 0.4 0.83333 0.11100 5 gctgcc gctgcc 0.0016161616162 0.0000898849838 4 0.26 0.00015 6.1e-01 0.21 7.37 0 4 0.26 5e-05 0.21 0.7 0.66667 0.04352 6 acagac acagac 0.0020202020202 0.0001459488381 5 0.42 7.4e-05 3.1e-01 0.52 7.09 0 5 0.42 9.3e-06 0.038 1.4 0.83333 0.06970 7 gaacag gaacag 0.0024242424242 0.0002099347587 6 0.60 3.8e-05 1.6e-01 0.80 6.98 0 5 0.59 5.2e-05 0.21 0.7 0.83333 0.09871 8 gcgatg gcgatg 0.0020202020202 0.0001526521250 5 0.44 9.2e-05 3.8e-01 0.42 6.91 0 5 0.44 1.2e-05 0.047 1.3 0.83333 0.07278 9 gagggg gagggg 0.0016161616162 0.0001011586936 4 0.29 0.00023 9.5e-01 0.02 6.89 0 4 0.29 7.9e-05 0.32 0.5 0.66667 0.04884 10 gcaaag gcaaag 0.0024242424242 0.0002510685649 6 0.72 0.00010 4.2e-01 0.38 6.24 0 5 0.70 0.00012 0.48 0.3 0.83333 0.11688 11 atgagc atgagc 0.0020202020202 0.0001883014237 5 0.54 0.00024 9.9e-01 0.01 6.08 0 5 0.53 3.1e-05 0.13 0.9 0.83333 0.08901 12 ccaata ccaata 0.0024242424242 0.0002781864074 6 0.79 0.00018 7.2e-01 0.14 5.84 0 5 0.77 0.00019 0.77 0.1 0.83333 0.12866 13 gaaagc gaaagc 0.0024242424242 0.0002934211505 6 0.84 0.00023 9.6e-01 0.02 5.64 0 5 0.81 0.00024 0.99 0.0 0.83333 0.13521 14 ; Job started 2004_04_02.113951 ; Job done 2004_04_02.113952