Hello, I have a perl program where I must mutate a DNA sequence while maintaining the same base pair distribution and then randomly mutate the sequence 10 to 20 times and find the similarity between the mutated and original sequence by calculating an adhoc score. This is what I have so far..

use strict;
use warnings;
my $sequence='A G G G C A C C T C T C A G T T C T C A T T C T A A C A C C A C
A T A A T T T T T A T T T G T A T T A T T C A G A T T T T T C A T G A A C T T T
T C C A C A T A G A A T G A A G T T G A C A T T G T T A T T T C T C A G G G T C
T C G G T T C A C C A G T A T T T G A C A A A C T T G A A G C T G A A C T A G C
T A A A G C T G C T A T G T C A T T G C C T G C A A C C A A G G G C T T T C A G
T T T G G T A G T G G G T T T G C A G G C A C C T T T T T G A C T G G G A G T G
A A C A C A A T G A T G A G T T C T A T A T A G A T G A A C A T G G A A A C A C
A A G A A C A A G A A C A A A T C G C T C T G G T G G G A T A C A G G G T G G A
A T T T C C A A T G G G G A A A T C A T T A A T A T G A G A A T A G C T T T C A
A G C C A A C A T C A A C A A T T G G A A A G A A G C A A A A T A C T G T G A C
T C G A G A T A A A A A A G A A A C A G A G T T T A T A G C C C G T G G T C G C';
my $i;
my $mutant;


# time|$$ combines the current time with the current process id
srand(time|$$);

$mutant = mutate_DNA($sequence);
print "\nMutate DNA\n\n";

print "\nHere is the original DNA sequence:\n\n";
print "$sequence\n";

print "\nHere is the mutated DNA:\n\n";
print "$mutant\n";

exit;

sub mutate_DNA{
my($dna) = @_;
my (@nucleotides)=('A', 'C', 'G', 'T');
# Pick a random position in the DNA
my($position) = randomposition($dna);
#Pick a random nucleotide
my ($newbase)=randomnucleotide(@nucleotides);
substr ($dna, $position, 1, $newbase);
return $dna;
}

sub randomelement{
    my(@array) = @_;
    return $array[rand @array];
  }

sub randomnucleotide{
    my(@nucs) = ('A','C','G','T');
    return $nucs[rand @nucs];
  }
sub randomposition{
    my($string) = @_;
    return int (rand(length($string)));
  }

Recommended Answers

All 4 Replies

#!/usr/bin/perl
#score_many_mutant.pl
use strict;
use warnings;

my $sequence='A G G G C A C C T C T C A G T T C T C A T T C T A A C A C C A C
A T A A T T T T T A T T T G T A T T A T T C A G A T T T T T C A T G A A C T T T
T C C A C A T A G A A T G A A G T T G A C A T T G T T A T T T C T C A G G G T C
T C G G T T C A C C A G T A T T T G A C A A A C T T G A A G C T G A A C T A G C
T A A A G C T G C T A T G T C A T T G C C T G C A A C C A A G G G C T T T C A G
T T T G G T A G T G G G T T T G C A G G C A C C T T T T T G A C T G G G A G T G
A A C A C A A T G A T G A G T T C T A T A T A G A T G A A C A T G G A A A C A C
A A G A A C A A G A A C A A A T C G C T C T G G T G G G A T A C A G G G T G G A
A T T T C C A A T G G G G A A A T C A T T A A T A T G A G A A T A G C T T T C A
A G C C A A C A T C A A C A A T T G G A A A G A A G C A A A A T A C T G T G A C
T C G A G A T A A A A A A G A A A C A G A G T T T A T A G C C C G T G G T C G C';
$sequence = uc($sequence); #Make sure all letters are upper case

#We have to remove spaces and line-separators, new-lines, etc. from sequence
$sequence =~ s/[^AGCT]//g; #Remove all characters that are not A, G, C, or T
my $mutant = $sequence; #Initialize as copy of above string

# time|$$ combines the current time with the current process id
srand(time|$$);

#randomly mutate the sequence 10 to 20 times
#Create integer between 10 and 20
my $random_nbr_of_reps = 10 + int(rand(11));

foreach(1..$random_nbr_of_reps){
    $mutant = mutate_DNA($mutant);
}

print "\nMutate DNA\n\n";

print "\nHere is the original DNA sequence:\n\n";
print "$sequence\n";

print "\nHere is the mutated DNA:\n\n";
print "$mutant\n";

print "\nHere are the scores:\n\n";
print "Sequence,Mutant,Score\n";
foreach(0 .. length($sequence) - 1){
    my $s = substr($sequence, $_, 1);
    my $m = substr($mutant, $_, 1);
    my $score = determine_score($s, $m);
    print "$s,$m,$score\n";
}
exit;

sub mutate_DNA{
my($dna) = @_;
my (@nucleotides)=('A', 'C', 'G', 'T');
# Pick a random position in the DNA
my($position) = randomposition($dna);
#Pick a random nucleotide
my ($newbase)=randomnucleotide(@nucleotides);
substr ($dna, $position, 1, $newbase);
return $dna;
}

print "Sequence,Mutant,Score\n";
foreach(0 .. length($sequence) - 1){
    my $s = substr($sequence, $_, 1);
    my $m = substr($mutant, $_, 1);
    my $score = determine_score($s, $m);
    print "$s,$m,$score\n";
}

sub randomelement{
    my(@array) = @_;
    return $array[rand @array];
}

sub randomnucleotide{
    my(@nucs) = ('A','C','G','T');
    return $nucs[rand @nucs];
}

sub randomposition{
    my($string) = @_;
    return int (rand(length($string)));
}

sub determine_score{
    my ($alpha, $beta) = sort @_; #Sort two base args in alphabetical order
    
    #If the base pair did not change assign 0.
    return 0 if $alpha eq $beta;
    
    #If a purine was mutated to a purine,
    #or a pyrimidine to a pyrimidine assign a value of +1 to that base pair.
    #If a purine was mutated to a pyrimidine or vice versa
    #assign a value of -1 to that base pair.
    my %rules;
    $rules{'A'}{'G'} = +1;
    $rules{'A'}{'T'} = -1;
    $rules{'A'}{'C'} = -1;
    $rules{'G'}{'T'} = -1;
    $rules{'C'}{'G'} = -1;
    $rules{'C'}{'T'} = +1;
    
    return $rules{$alpha}{$beta};
}

Thank you for your help and it makes sense

copy/paste error FTW!
exit early...

commented: "For The Win"? Please don't litter. +0

Thank you for your help and it makes sense

You're welcome. Please mark this thread solved.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.