Here's a Python API solution using the pyvcf
submodule I wrote:
>>> from fuc import pyvcf
>>> data = {
... 'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
... 'POS': [100, 101, 102, 103, 104],
... 'ID': ['.', '.', '.', '.', '.'],
... 'REF': ['A', 'A', 'C', 'C', 'T'],
... 'ALT': ['C,T', 'T', 'G', 'G,A', 'A'],
... 'QUAL': ['.', '.', '.', '.', '.'],
... 'FILTER': ['.', '.', '.', '.', '.'],
... 'INFO': ['.', '.', '.', '.', '.'],
... 'FORMAT': ['GT', 'GT', 'GT', 'GT', 'GT'],
... 'A': ['0/2', '0/0', '0/1', './.', '0/1'],
... 'B': ['0/1', '1/1', './.', '1/2', '1/1'],
... }
>>> vf = pyvcf.VcfFrame.from_dict([], data)
>>> # vf = pyvcf.VcfFrame.from_file('in.vcf')
>>> vf.df
CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
0 chr1 100 . A C,T . . . GT 0/2 0/1
1 chr1 101 . A T . . . GT 0/0 1/1
2 chr1 102 . C G . . . GT 0/1 ./.
3 chr1 103 . C G,A . . . GT ./. 1/2
4 chr1 104 . T A . . . GT 0/1 1/1
>>> # Remove multiallelic variants
>>> vf = vf.filter_multialt()
>>> vf.df
CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
0 chr1 101 . A T . . . GT 0/0 1/1
1 chr1 102 . C G . . . GT 0/1 ./.
2 chr1 104 . T A . . . GT 0/1 1/1
>>> # Select only variants where the sample B is homozygous
>>> def one_row(r):
... return r['B'].split('/')[0] == r['B'].split('/')[1]
...
>>> i = vf.df.apply(one_row, axis=1)
>>> vf.df = vf.df[i]
>>> vf.df
CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
0 chr1 101 . A T . . . GT 0/0 1/1
1 chr1 102 . C G . . . GT 0/1 ./.
2 chr1 104 . T A . . . GT 0/1 1/1
>>> vf.to_file('out.vcf')