0

I have tried debugging and optimizing this code, yet I cannot find the source of this error to save my life.

I am getting a 'floating point exception' when I run the code pasted below.

I am running with 'use strict' and 'use warnings'. My program takes a sequence and BLASTs it. We take the BLAST results and add them to a hash. We then take the results and BLAST them and add them to the hash.

Any help would be greatly appreciated. I cannot seem to find good documentation within perdocs, perl.org, or elsewhere as to what exactly causes a 'floating point exception'. I thought it was a mistake in using float data types; however, I never get the error on the same iteration, and I am not performing any math.

Thanks!
Stephen

sub ash_hash() {
	my $ash_hash = {};
	
	##### Open files
	open(ERR,">>$files{dir}$files{error}");			# open error file handle
	open(WARN,">>$files{dir}$files{warning}");		# open warning file handle
	
	#### First Blast Query
	print "Running seed query";
	my $blast_temp = ash_sab(\$seed{seq},\$seed{acc}); 
	
	chmod(0777,$blast_temp);											# share the file
	if(!-e $blast_temp) { die("No BLAST data returned.\n"); }			# kill program if no blast data
	
	#### Parse Blast Query
	$ash_hash->{$seed{acc}} = ash_sabParse($blast_temp);				# parse BLAST data
	unlink($blast_temp);												# destroy the file
	
	##### Build the Hash
	while($flags{count} < $flags{iterations}) {
		foreach my $branch (keys(%{$ash_hash})) {							# loop through seq w/ returned blast queries
			foreach my $leaf (keys(%{$ash_hash->{$branch}})) {			# loop through seq returned in the blast queries
				if(!(defined $ash_hash->{$leaf})) {					# skip if we already have this sequence
					if(exists($bad_sequences{$leaf})) {				# if we've already logged this seq, skip it entirely
						delete $ash_hash->{$branch}->{$leaf};
						next;
					}
					
					my $seq = acc2seq($leaf);							# retrieve the sequence REFERENCE!!!
					if($seq <= 0) {								# no string data -- no seq
						if($seq == -1) { 								# not a protein sequence
							print ERR "Nucleotide sequence: $leaf.\n"; }
						elsif($seq == 0) {								# unable to get a sequence returned
							print ERR "Bad sequence: $leaf.\n"; }
						else {
							print ERR "Unknown value returned: $leaf.\n"; }
						$bad_sequences{$leaf} = 1;
						delete $ash_hash->{$branch}->{$leaf};
						next;
					}
					
					##### output iteration count ON THE SAME LINE
					#$| = 1;
					print "\rRunning iteration $flags{count}:\t$leaf          ";
					#$| = 0;
					my $blast_temp = ash_sab($seq,\$leaf);				# run BLAST
					chmod(0777,$blast_temp);							# share the file
					if(!-e $blast_temp) {
						die("\n\nNo BLAST data returned.\n"); }			# kill program if no blast data
					if(!($ash_hash->{$leaf} = ash_sabParse($blast_temp))) {	# add results to hash
						print ERR "No BLAST sequences returned: $leaf.\n";
						delete $ash_hash->{$leaf};
						$bad_sequences{$leaf} = 1;
						next;
					}
					if($flags{save_everything}) {						# move the file to the blast folder or
						move($blast_temp,$files{dir}."BLAST/".$leaf.".blast");
					} else { unlink($blast_temp); }					# destroy the file
					$flags{count}++;
				}
			}
		}
	}
...
}
sub ash_sab( $ $ ) {
	my $seq = shift or die("No sequence data.\n");					# read in variables
	my $acc = shift or die("No accession number.\n");
	my $out = "$files{dir}$files{temp}";
	
	my @params = (													# blast factory parameters
		'program' => $blast{program},
		'database' => $blast{db},
		'outfile' => $out,
		'expect' => $blast{evalue});
#		'method' => $blast{method});								# parameter variable for factory creation
	my $factory = Bio::Tools::Run::StandAloneBlast->new(@params);	# create handler for SAB
	my $seqobj = Bio::Seq->new(-id=> $$acc,-seq => $$seq);			# create a Seq object from seq string
	my $blast_report = $factory->blastall($seqobj);					# run BLAST on SEQ object
	return $out;
}
sub ash_sabParse($) {
	my $in = shift;		# get filename
	my $scores = ();		# hash ref to hold chosen scores
	my $count = 0;			# number of results
	my $acc = 0; my $bs = 0; my $ev = 0;	# values pulled from the blast file
	
	open(BF,$in);			# open the blast file
	my @blast_file = <BF>;	# read in entire file
	close(BF);				# close the file handle
	undef $in;
	
	splice(@blast_file,0,20);					# deletes the beginning file information
	
	##### Open files
	#open(ERR,">>$files{dir}$files{error}");			# open error file handle
	#open(WARN,">>$files{dir}$files{warning}");		# open warning file handle
	
	foreach my $line (@blast_file) {	# iterate through file
		# no more data
		if($line =~ m/$>/ || $line =~ m/Matrix\:/) {
			@blast_file = {};	# clear remaining data in blast file;
		}
		
		# Getting the good data
		if($line =~ m/^>/) { last; }
		elsif($line =~ m/^(gb|dbj|emb|ref)\|([\w\d]*).*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/) {
		# This section collects the normal data
			$acc = $2;		# accession number
			$bs = $3;		# raw score
			$ev = $4;		# e-value
		} elsif($line =~ m/^([\w\d]*) hypothetical protein.*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/ && $flags{hypotheticals}) {
		# This section collects most of the hypothetical data
			$acc = $1;		# accession number
			$bs = $2;		# raw score
			$ev = $3;		# e-value
		} elsif($line =~ m/^(\w\d+).*\s+([\d\.]+)\s+(\d+(e-|\.)\d*)/) {
		# This section collects a few others
			$acc = $1;		# accession number
			$bs = $2;		# raw score
			$ev = $3;		# e-value
		} else {
			print WARN "NOT USED: $line"; 
		}
		
		# saving the good data or printing to error file
		if(!$acc && !$ev && !$bs) { next;														# not enough data collected
		} elsif($acc =~ m/XP_/i) { print WARN "NOT USED, experimental: $line";					# skip experimental data
		} elsif(exists($bad_sequences{$acc})) { print WARN "SKIPPED: noted bad seq: $acc\n";	# skip found bad sequence
		} elsif($flags{score_type} eq 'bitscore' && $bs) { 									# save raw score and accn no.
			$scores->{$acc} = $bs; 
			$count++; 
		} elsif($flags{score_type} eq 'evalue' && $ev) {										# save e-value and accn no.
			$scores->{$acc} = $ev; 
			$count++; }
		else {}
	}

	##### close files
	#close(ERR);
	#close(WARN);
	
	##### release variables
	undef $acc;
	undef $bs;
	undef $ev;
	
	$count > 0 ? return $scores : return 0;
}
2
Contributors
4
Replies
5
Views
9 Years
Discussion Span
Last Post by muppetjones
0

You are using math, there are several places in the script that use math operators:

==
<=

maybe more. I do not know if that is where you problems are occuring. Erros/Warnings generally have a line number associated with them that can help pin point where a error/warning is occuring. Run your program and see if a line number and a program/module name gets displayed along with the message.

0

So...a quick update.

I do not know if that is where you problems are occuring. Erros/Warnings generally have a line number associated with them that can help pin point where a error/warning is occuring. Run your program and see if a line number and a program/module name gets displayed along with the message.

No, there are no line numbers given with the error -- only "Floating point exception".

I think it may be a memory problem as it usually happens around iterations 100-150, and it is more prone to be thrown after it has occurred once.

Also, after stepping through the debugger several times and reaching 200+ iterations without a problem, I may have found the culprit: I think it is actually NCBI's get_Stream_by_acc() function. After I finally caused the exception manually, the program failed in this function at line 9:

sub acc2seq( $ ) {
	my $count = 0; my $seq;
	my $acc = shift;								# get accession number
	while($count < 5) {
		my $gb = Bio::DB::GenBank->new(				# create new GenBank object
			-retrievaltype => 'tempfile' ,
			-format => 'Fasta');
		my $seqio;
		eval{$seqio = $gb->get_Stream_by_acc($acc)};	# contact NCBI for data
		if($@ || !$seqio) {										# handle NCBI error
			sleep 2;
			$count++;
			next;
		}
		my $seqobj = $seqio->next_seq();			# return SeqIO object
		if($seqobj->alphabet ne 'protein') {
			return -1;
		}
		$seq = $seqobj->seq();						# return the string sequence
		
		##### Error handling for no sequence returned
		if($seq =~ /(Error)/) {
			sleep 2;								# pause and keep going
			$count++;								
			next;									# reiterate and reevaluate loop
		}
		$count = 100;									
    }
    
    $count == 100 ? return \$seq : return 0;		# if good seq, return, else 0
}

Does anyone have any inkling on how to catch this since eval isn't working?

Thoughts on why this may be a problem..."WARNING: Please do NOT spam the Entrez web server with multiple requests." This is an excerpt from the module documentation. I'm not sure, but I believe this could be the cause.

I could avoid calling GeneBank every time and store the sequences myself, but this would be a very very large amount of data. Thoughts? Does anyone see any way around this?

0

I really don't know, sorry. Find a forum or newsgroup that specializes in bioperl and you may find someone that has experienced this problem and can help.

Good luck.

0

Turns out it was NCBI...I was querying the DB so often it kept crashing on me. Unfortunately, I queried slow enough during debug that it usually didn't have a problem.

Solution: Query for multiple sequences at once rather than just one.

This question has already been answered. Start a new discussion instead.
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.