Class: Cheripic::Vcf

Inherits:
Object
  • Object
show all
Defined in:
lib/cheripic/vcf.rb

Class Method Summary collapse

Class Method Details

.filtering(mutant_vcf, bgbulk_vcf) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/cheripic/vcf.rb', line 63

def self.filtering(mutant_vcf, bgbulk_vcf)
  var_pos_mut = get_vars(mutant_vcf)
  return var_pos_mut if bgbulk_vcf == ''
  var_pos_bg = get_vars(bgbulk_vcf)

  # if both bulks have homozygous mutations at same positions then deleting them
  var_pos_mut.each_key do | frag |
    positions = var_pos_mut[frag][:hom].keys
    pos_bg_bulk = var_pos_bg[frag][:hom].keys
    positions.each do |pos|
      if pos_bg_bulk.include?(pos)
        var_pos_mut[frag][:hom].delete(pos)
      end
    end
  end
  var_pos_mut
end

.get_allele_freq(vcf_obj) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/cheripic/vcf.rb', line 12

def self.get_allele_freq(vcf_obj)
  # check if the vcf is from samtools (has DP4 and AF1 fields in INFO)
  if vcf_obj.info.key?('DP4')
    freq = vcf_obj.info['DP4'].split(',')
    depth = freq.inject { | sum, n | sum.to_f + n.to_f }
    alt = freq[2].to_f + freq[3].to_f
    allele_freq = alt / depth
    # allele_freq = vcf_obj.non_ref_allele_freq
  # check if the vcf is from VarScan (has RD, AD and FREQ fields in FORMAT)
  elsif vcf_obj.samples['1'].key?('RD')
    alt = vcf_obj.samples['1']['AD'].to_f
    depth = vcf_obj.samples['1']['RD'].to_f + alt
    allele_freq = alt / depth
  # check if the vcf is from GATK (has AD and GT fields in FORMAT)
  elsif vcf_obj.samples['1'].key?('AD') and vcf_obj.samples['1']['AD'].include?(',')
    freq = vcf_obj.samples['1']['AD'].split(',')
    allele_freq = freq[1].to_f / ( freq[0].to_f + freq[1].to_f )
  # check if the vcf has has AF fields in INFO
  elsif vcf_obj.info.key?('AF')
    allele_freq = vcf_obj.info['AF'].to_f
  else
    raise VcfError.new 'not a supported vcf format (VarScan, GATK, Bcftools(Samtools), Vcf 4.0, 4.1 and 4.2)' +
                           " and check that it is one sample vcf\n"
  end
  allele_freq
end

.get_vars(vcf_file) ⇒ Object

Input: vcf file Ouput: lists of hm and ht SNPS and hash of all fragments with variants



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/cheripic/vcf.rb', line 42

def self.get_vars(vcf_file)
  ht_low = Options.htlow
  ht_high = Options.hthigh

  # hash of :het and :hom with frag ids and respective variant positions
  var_pos = Hash.new{ |h,k| h[k] = Hash.new(&h.default_proc) }
  File.foreach(vcf_file) do |line|
    next if line =~ /^#/
    v = Bio::DB::Vcf.new(line)
    unless v.alt == '.'
      allele_freq = get_allele_freq(v)
      if allele_freq.between?(ht_low, ht_high)
        var_pos[v.chrom][:het][v.pos] = allele_freq
      elsif allele_freq > ht_high
        var_pos[v.chrom][:hom][v.pos] = allele_freq
      end
    end
  end
  var_pos
end