# Examples of user-defined filters. Edit and run with -f filters.txt.
# The examples below are self-explanatory. Notice the use of the predefined
#  variables ($PASS, $FAIL, $MATCH, $RECORD) and methods (error).


# In this example, a minimum value of AF1=0.1 is required
{
    tag  => 'INFO/AF1',                       # The VCF tag to apply this filter on
        name => 'MinAF',                          # The filter ID
        desc => 'Minimum AF1 [0.01]',             # Description for the VCF header
        test => sub { return $MATCH < 0.01 ? $FAIL : $PASS },
},


# Filter all indels (presence of INDEL tag is tested)
{
    tag      => 'INFO/INDEL',
    apply_to => 'indels',         # Can be one of SNPs, indels, all. Default: [All]
    name     => 'Indel',
    desc     => 'INDEL tag present',
    test     => sub { return $FAIL },
},


# Only loci with enough reads supporting the variant will pass the filter
{
    tag      => 'INFO/DP4',
    name     => 'FewAlts',
    desc     => 'Too few reads supporting the variant',
    apply_to => 'SNPs',
    test     => sub {
        if ( !($MATCH =~ /^([^,]+),([^,]+),([^,]+),(.+)$/) )
        {
            error("Could not parse INFO/DP4: $CHROM:$POS [$MATCH]");
        }
        if ( 0.1*($1+$2) > $3+$4  ) { return $PASS; }
        return $FAIL;
    },
},

# Example of filtering based on genotype columns and the QUAL column
{
    tag      => 'FORMAT/PL',
    name     => 'NoHets',
    desc     => 'Inbred homozygous mouse, no hets expected',
    apply_to => 'SNPs',
    test     => sub {
            for my $pl (@$MATCH)
            {
                my @pls = split(/,/,$pl);
                if ( $pls[1]<$pls[0] && $pls[1]<$pls[2] ) { return $FAIL; }
            }
        return $PASS;
    },
},


# This example splits the four PV4 values into four tags names PV0, PV1, PV2 and PV3. 
#   Note the use of the 'header' key, and the $RECORD and $VCF variables.
{
    header   => [ 
                    qq[key=INFO,ID=PV0,Number=1,Type=Float,Description="P-value for strand bias"],
                    qq[key=INFO,ID=PV1,Number=1,Type=Float,Description="P-value for baseQ bias"],
                    qq[key=INFO,ID=PV2,Number=1,Type=Float,Description="P-value for mapQ bias"],
                    qq[key=INFO,ID=PV3,Number=1,Type=Float,Description="P-value for tail distance bias"] 
                ], 
    tag      => 'INFO/PV4',
    name     => 'SplitPV4',
    desc     => 'Split PV4',
    apply_to => 'all',
    test     => sub {
        my @vals = split(/,/,$MATCH);
        $$RECORD[7] = $VCF->add_info_field($$RECORD[7],'PV0'=>$vals[0],'PV1'=>$vals[1],'PV2'=>$vals[2],'PV3'=>$vals[3]);
        return $PASS;
    },
},

# Do whatever you want with every record and edit it according to your needs. This silly
#   example removes the tag SILLY in records where ID is set and depth is bigger than 5.
{
    tag      => 'Dummy',
    test     => sub {
        if ( $$RECORD[2] eq '.' ) { return $PASS; } # Modify only lines with ID
        my $dp = $vcf->get_info_field($$RECORD[7],'DP');
        if ( $dp>5 ) { $$RECORD[7] = $VCF->add_info_field($$RECORD[7],'SILLY'=>undef); }
        return $PASS;
    },
}


# Filter records with the value XY absent or not equal to 42
{
    tag      => 'Dummy',
    header   => [
        qq[key=FILTER,ID=XY,Description="XY not OK"],
    ],
    test     => sub {
        my $xy      = $VCF->get_info_field($$RECORD[7],'XY');
        my $is_bad  = ( !defined $xy or $xy!=42 ) ? 1 : 0;
        $$RECORD[6] = $VCF->add_filter($$RECORD[6],'XY'=>$is_bad);
        return $PASS;
    },
},


# Annotate INFO field with SINGLETON flag when one and only one sample is different from the reference
{
    header   => [ 
        qq[key=INFO,ID=SINGLETON,Number=0,Type=Flag,Description="Only one non-ref sample"],
    ],
    tag      => 'FORMAT/GT',
    name     => 'Dummy',
    desc     => 'Dummy',
    test     => sub {
        my $nalt = 0;
        for my $gt (@$MATCH)
        {
            my @gt = $VCF->split_gt($gt);
            for my $allele (@gt)
            {
                if ( $allele ne 0 && $allele ne '.' ) { $nalt++; last; }
            }
            if ( $nalt>1 ) { last; }
        }
        if ( $nalt==1 ) { $$RECORD[7] = $VCF->add_info_field($$RECORD[7],'SINGLETON'=>''); }
        return $PASS;
    },
},


# Set genotypes to unknown ("." or "./." depending on ploidy) when coverage is low (by Shane McCarthy).
{
    tag      => 'FORMAT/DP',
    name     => 'MinSampleDP',
    desc     => 'Genotypes set to . for samples with DP < 2',
    apply_to => 'all',
    test     => sub {
        my $i = 8;
        for my $dp (@$MATCH)
        {
            $i++;
            next unless ($dp<2);
            my @format = split(/:/,$$RECORD[$i]);
            $format[0] = $format[0] =~ /\// ? "./." : ".";
            $$RECORD[$i] = join(":",@format);
        }
        return $PASS;
    },
},

