본문 바로가기
Python

python VCF file 정보 추출 (chromosome) 하고 plot 만들기

by 코딩하는 미토콘드리아 bioinformatics 2024. 4. 16.
반응형

python VCF file 정보 추출 (chromosome) 하고 plot 만들기

 

import matplotlib.pyplot as plt

def extract_chromosome(vcf_file):
    chromosomes = []
    with open(vcf_file, 'r') as f:
        for line in f:
            # Skip header lines
            if line.startswith('#'):
                continue
            # Split the line into fields
            fields = line.strip().split('\t')
            # Extract chromosome information
            chromosome = fields[0]
            # Append the chromosome information to the list
            chromosomes.append(chromosome)
    return chromosomes

# Function to create a bar plot of chromosome frequencies
def plot_chromosome_frequencies(chromosomes):
    chromosome_counts = {chromosome: chromosomes.count(chromosome) for chromosome in set(chromosomes)}
    sorted_chromosomes = sorted(chromosome_counts.items(), key=lambda x: x[1], reverse=True)
    chromosome_names, counts = zip(*sorted_chromosomes)
    
    # Create bar plot
    plt.figure(figsize=(10, 6))
    plt.bar(chromosome_names, counts, color='skyblue')
    plt.xlabel('Chromosome')
    plt.ylabel('Frequency')
    plt.title('Chromosome Frequencies in VCF File')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

# usage
if __name__ == "__main__":
    vcf_file = 'sample.vcf'
    chromosomes = extract_chromosome(vcf_file)
    plot_chromosome_frequencies(chromosomes)
반응형