I have tried debugging and optimizing this code, yet I cannot find the source of this error to save my life.

I am getting a 'floating point exception' when I run the code pasted below.

I am running with 'use strict' and 'use warnings'. My program takes a sequence and BLASTs it. We take the BLAST results and add them to a hash. We then take the results and BLAST them and add them to the hash.

Any help would be greatly appreciated. I cannot seem to find good documentation within perdocs, perl.org, or elsewhere as to what exactly causes a 'floating point exception'. I thought it was a mistake in using float data types; however, I never get the error on the same iteration, and I am not performing any math.

Thanks!
Stephen

sub ash_hash() {
	my $ash_hash = {};
	
	##### Open files
	open(ERR,">>$files{dir}$files{error}");			# open error file handle
	open(WARN,">>$files{dir}$files{warning}");		# open warning file handle
	
	#### First Blast Query
	print "Running seed query";
	my $blast_temp = ash_sab(\$seed{seq},\$seed{acc}); 
	
	chmod(0777,$blast_temp);											# share the file
	if(!-e $blast_temp) { die("No BLAST data returned.\n"); }			# kill program if no blast data
	
	#### Parse Blast Query
	$ash_hash->{$seed{acc}} = ash_sabParse($blast_temp);				# parse BLAST data
	unlink($blast_temp);												# destroy the file
	
	##### Build the Hash
	while($flags{count} < $flags{iterations}) {
		foreach my $branch (keys(%{$ash_hash})) {							# loop through seq w/ returned blast queries
			foreach my $leaf (keys(%{$ash_hash->{$branch}})) {			# loop through seq returned in the blast queries
				if(!(defined $ash_hash->{$leaf})) {					# skip if we already have this sequence
					if(exists($bad_sequences{$leaf})) {				# if we've already logged this seq, skip it entirely
						delete $ash_hash->{$branch}->{$leaf};
						next;
					}
					
					my $seq = acc2seq($leaf);							# retrieve the sequence REFERENCE!!!
					if($seq <= 0) {								# no string data -- no seq
						if($seq == -1) { 								# not a protein sequence
							print ERR "Nucleotide sequence: $leaf.\n"; }
						elsif($seq == 0) {								# unable to get a sequence returned
							print ERR "Bad sequence: $leaf.\n"; }
						else {
							print ERR "Unknown value returned: $leaf.\n"; }
						$bad_sequences{$leaf} = 1;
						delete $ash_hash->{$branch}->{$leaf};
						next;
					}
					
					##### output iteration count ON THE SAME LINE
					#$| = 1;
					print "\rRunning iteration $flags{count}:\t$leaf          ";
					#$| = 0;
					my $blast_temp = ash_sab($seq,\$leaf);				# run BLAST
					chmod(0777,$blast_temp);							# share the file
					if(!-e $blast_temp) {
						die("\n\nNo BLAST data returned.\n"); }			# kill program if no blast data
					if(!($ash_hash->{$leaf} = ash_sabParse($blast_temp))) {	# add results to hash
						print ERR "No BLAST sequences returned: $leaf.\n";
						delete $ash_hash->{$leaf};
						$bad_sequences{$leaf} = 1;
						next;
					}
					if($flags{save_everything}) {						# move the file to the blast folder or
						move($blast_temp,$files{dir}."BLAST/".$leaf.".blast");
					} else { unlink($blast_temp); }					# destroy the file
					$flags{count}++;
				}
			}
		}
	}
...
}
sub ash_sab( $ $ ) {
	my $seq = shift or die("No sequence data.\n");					# read in variables
	my $acc = shift or die("No accession number.\n");
	my $out = "$files{dir}$files{temp}";
	
	my @params = (													# blast factory parameters
		'program' => $blast{program},
		'database' => $blast{db},
		'outfile' => $out,
		'expect' => $blast{evalue});
#		'method' => $blast{method});								# parameter variable for factory creation
	my $factory = Bio::Tools::Run::StandAloneBlast->new(@params);	# create handler for SAB
	my $seqobj = Bio::Seq->new(-id=> $$acc,-seq => $$seq);			# create a Seq object from seq string
	my $blast_report = $factory->blastall($seqobj);					# run BLAST on SEQ object
	return $out;
}
sub ash_sabParse($) {
	my $in = shift;		# get filename
	my $scores = ();		# hash ref to hold chosen scores
	my $count = 0;			# number of results
	my $acc = 0; my $bs = 0; my $ev = 0;	# values pulled from the blast file
	
	open(BF,$in);			# open the blast file
	my @blast_file = <BF>;	# read in entire file
	close(BF);				# close the file handle
	undef $in;
	
	splice(@blast_file,0,20);					# deletes the beginning file information
	
	##### Open files
	#open(ERR,">>$files{dir}$files{error}");			# open error file handle
	#open(WARN,">>$files{dir}$files{warning}");		# open warning file handle
	
	foreach my $line (@blast_file) {	# iterate through file
		# no more data
		if($line =~ m/$>/ || $line =~ m/Matrix\:/) {
			@blast_file = {};	# clear remaining data in blast file;
		}
		
		# Getting the good data
		if($line =~ m/^>/) { last; }
		elsif($line =~ m/^(gb|dbj|emb|ref)\|([\w\d]*).*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/) {
		# This section collects the normal data
			$acc = $2;		# accession number
			$bs = $3;		# raw score
			$ev = $4;		# e-value
		} elsif($line =~ m/^([\w\d]*) hypothetical protein.*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/ && $flags{hypotheticals}) {
		# This section collects most of the hypothetical data
			$acc = $1;		# accession number
			$bs = $2;		# raw score
			$ev = $3;		# e-value
		} elsif($line =~ m/^(\w\d+).*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/) {
		# This section collects a few others
			$acc = $1;		# accession number
			$bs = $2;		# raw score
			$ev = $3;		# e-value
		} else {
			print WARN "NOT USED: $line"; 
		}
		
		# saving the good data or printing to error file
		if(!$acc && !$ev && !$bs) { next;														# not enough data collected
		} elsif($acc =~ m/XP_/i) { print WARN "NOT USED, experimental: $line";					# skip experimental data
		} elsif(exists($bad_sequences{$acc})) { print WARN "SKIPPED: noted bad seq: $acc\n";	# skip found bad sequence
		} elsif($flags{score_type} eq 'bitscore' && $bs) { 									# save raw score and accn no.
			$scores->{$acc} = $bs; 
			$count++; 
		} elsif($flags{score_type} eq 'evalue' && $ev) {										# save e-value and accn no.
			$scores->{$acc} = $ev; 
			$count++; }
		else {}
	}

	##### close files
	#close(ERR);
	#close(WARN);
	
	##### release variables
	undef $acc;
	undef $bs;
	undef $ev;
	
	$count > 0 ? return $scores : return 0;
}

Recommended Answers

All 4 Replies

You are using math, there are several places in the script that use math operators:

==
<=

maybe more. I do not know if that is where you problems are occuring. Erros/Warnings generally have a line number associated with them that can help pin point where a error/warning is occuring. Run your program and see if a line number and a program/module name gets displayed along with the message.

So...a quick update.

I do not know if that is where you problems are occuring. Erros/Warnings generally have a line number associated with them that can help pin point where a error/warning is occuring. Run your program and see if a line number and a program/module name gets displayed along with the message.

No, there are no line numbers given with the error -- only "Floating point exception".

I think it may be a memory problem as it usually happens around iterations 100-150, and it is more prone to be thrown after it has occurred once.

Also, after stepping through the debugger several times and reaching 200+ iterations without a problem, I may have found the culprit: I think it is actually NCBI's get_Stream_by_acc() function. After I finally caused the exception manually, the program failed in this function at line 9:

sub acc2seq( $ ) {
	my $count = 0; my $seq;
	my $acc = shift;								# get accession number
	while($count < 5) {
		my $gb = Bio::DB::GenBank->new(				# create new GenBank object
			-retrievaltype => 'tempfile' ,
			-format => 'Fasta');
		my $seqio;
		eval{$seqio = $gb->get_Stream_by_acc($acc)};	# contact NCBI for data
		if($@ || !$seqio) {										# handle NCBI error
			sleep 2;
			$count++;
			next;
		}
		my $seqobj = $seqio->next_seq();			# return SeqIO object
		if($seqobj->alphabet ne 'protein') {
			return -1;
		}
		$seq = $seqobj->seq();						# return the string sequence
		
		##### Error handling for no sequence returned
		if($seq =~ /(Error)/) {
			sleep 2;								# pause and keep going
			$count++;								
			next;									# reiterate and reevaluate loop
		}
		$count = 100;									
    }
    
    $count == 100 ? return \$seq : return 0;		# if good seq, return, else 0
}

Does anyone have any inkling on how to catch this since eval isn't working?

Thoughts on why this may be a problem..."WARNING: Please do NOT spam the Entrez web server with multiple requests." This is an excerpt from the module documentation. I'm not sure, but I believe this could be the cause.

I could avoid calling GeneBank every time and store the sequences myself, but this would be a very very large amount of data. Thoughts? Does anyone see any way around this?

I really don't know, sorry. Find a forum or newsgroup that specializes in bioperl and you may find someone that has experienced this problem and can help.

Good luck.

Turns out it was NCBI...I was querying the DB so often it kept crashing on me. Unfortunately, I queried slow enough during debug that it usually didn't have a problem.

Solution: Query for multiple sequences at once rather than just one.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.