28 February 1926
  • STEP1: download GenomeAsia100K panel data
    # GenomeAsia100K data are available at the links below. Users can download GA100K data in compressed Variant Call Format (VCF) file.
    mkdir ~/hpc/db/GenomeAsia100K
    cd ~/hpc/db/GenomeAsia100K
    for i in {1..22}
    do
    wget --no-check-certificate https://browser.genomeasia100k.org/service/web/download_files/$i.substitutions.annot.cont_withmaf.vcf.gz &
    done
    
  • STEP2: filter EAS and SAS common SNPs: INFO/AF_SAS>0.01 \&INFO/AF_SEA>0.01
    cd ~/hpc/rheumatology/RA/meta3000/MIR
    panel="hsa.gff3"
    mkdir temp
    for i in {1..22} X Y
    do
    echo \#PBS -N $i  > $i.job
    echo \#PBS -l nodes=1:ppn=1 >> $i.job
    echo \#PBS -M Guo.shicheng\@marshfieldresearch.org >> $i.job
    echo \#PBS -o $(pwd)/temp/ >>$i.job
    echo \#PBS -e $(pwd)/temp/ >>$i.job
    echo cd $(pwd) >> $i.job
    echo \# bcftools norm -m \+ ~/hpc/db/GenomeAsia100K/$i.substitutions.annot.cont_withmaf.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.norm.vcf.gz >> $i.job
    echo \# tabix -p vcf $i.substitutions.annot.cont_withmaf.norm.vcf.gz >> $i.job
    echo bcftools view -v snps -f PASS -i \'INFO/AF_SAS\>0.01 \&INFO/AF_SEA\>0.01\' -T $panel.hg19.bed $i.substitutions.annot.cont_withmaf.norm.vcf.gz -Oz -o  gnomad.genomes.r2.1.sites.chr$i.rec.$panel.vcf.gz >>$i.job
    echo bcftools sort $i.substitutions.annot.cont_withmaf.norm.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.norm.sort.vcf.gz >> $i.job
    echo bcftools norm -d $i.substitutions.annot.cont_withmaf.norm.sort.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.sort.vcf.gz >> $i.job
    echo bcftools view -m2 -M2 -v snps $i.substitutions.annot.cont_withmaf.sort.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.$panel.sort.vcf.gz >>$i.job
    qsub $i.job
    done
    


blog comments powered by Disqus