- STEP1: download GenomeAsia100K panel data
# GenomeAsia100K data are available at the links below. Users can download GA100K data in compressed Variant Call Format (VCF) file. mkdir ~/hpc/db/GenomeAsia100K cd ~/hpc/db/GenomeAsia100K for i in {1..22} do wget --no-check-certificate https://browser.genomeasia100k.org/service/web/download_files/$i.substitutions.annot.cont_withmaf.vcf.gz & done
- STEP2: filter EAS and SAS common SNPs: INFO/AF_SAS>0.01 \&INFO/AF_SEA>0.01
cd ~/hpc/rheumatology/RA/meta3000/MIR panel="hsa.gff3" mkdir temp for i in {1..22} X Y do echo \#PBS -N $i > $i.job echo \#PBS -l nodes=1:ppn=1 >> $i.job echo \#PBS -M Guo.shicheng\@marshfieldresearch.org >> $i.job echo \#PBS -o $(pwd)/temp/ >>$i.job echo \#PBS -e $(pwd)/temp/ >>$i.job echo cd $(pwd) >> $i.job echo \# bcftools norm -m \+ ~/hpc/db/GenomeAsia100K/$i.substitutions.annot.cont_withmaf.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.norm.vcf.gz >> $i.job echo \# tabix -p vcf $i.substitutions.annot.cont_withmaf.norm.vcf.gz >> $i.job echo bcftools view -v snps -f PASS -i \'INFO/AF_SAS\>0.01 \&INFO/AF_SEA\>0.01\' -T $panel.hg19.bed $i.substitutions.annot.cont_withmaf.norm.vcf.gz -Oz -o gnomad.genomes.r2.1.sites.chr$i.rec.$panel.vcf.gz >>$i.job echo bcftools sort $i.substitutions.annot.cont_withmaf.norm.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.norm.sort.vcf.gz >> $i.job echo bcftools norm -d $i.substitutions.annot.cont_withmaf.norm.sort.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.sort.vcf.gz >> $i.job echo bcftools view -m2 -M2 -v snps $i.substitutions.annot.cont_withmaf.sort.vcf.gz -Oz -o $i.substitutions.annot.cont_withmaf.$panel.sort.vcf.gz >>$i.job qsub $i.job done