Hi, i've a problem about load and parse a large array block on my dictionary-like app.
My app will load mytext.db to its array. App users entered specific text to txtIn, press cmdSearch and get the translated ones.
The main issue is scanning speed, load array speed and match input text to array.
I've read a libclamav source-code and found a simple but greats algorithm, but can't translated C language to VB.
Can anyone help me?

Hi,

Kindly post the algorithm here, Then people can understand better and help you you to solve the issue....


-venkatramasamy SN

static int hashtab_grow(struct hashtable *s)
{
	const size_t new_capacity = get_nearest_capacity(s->capacity);
	struct element* htable = cli_calloc(new_capacity, sizeof(*s->htable));
	size_t i,idx, used = 0;
	if(new_capacity == s->capacity || !htable)
		return CL_EMEM;

	PROFILE_GROW_START(s);
	cli_dbgmsg("hashtab.c: Warning: growing open-addressing hashtables is slow. Either allocate more storage when initializing, or use other hashtable types!\n");
	for(i=0; i < s->capacity;i++) {
		if(s->htable[i].key && s->htable[i].key != DELETED_KEY) {
			struct element* element;
			size_t tries = 1;				

			PROFILE_CALC_HASH(s);
			idx = hash(s->htable[i].key, strlen((const char*)s->htable[i].key), new_capacity);
			element = &htable[idx];

			while(element->key && tries <= new_capacity) {
				idx = (idx + tries++) % new_capacity;
				element = &htable[idx];
			}
			if(!element->key) {
				/* copy element from old hashtable to new */
				PROFILE_GROW_FOUND(s, tries);
				*element = s->htable[i];
				used++;
			}
			else {
				cli_errmsg("hashtab.c: Impossible - unable to rehash table");
				return CL_EMEM;/* this means we didn't find enough room for all elements in the new table, should never happen */ 
			}
		}
	}
	free(s->htable);
	s->htable = htable;
	s->used = used;
	s->capacity = new_capacity;
	s->maxfill = new_capacity*8/10;
	cli_dbgmsg("Table %p size after grow:%ld\n",(void*)s,s->capacity);
	PROFILE_GROW_DONE(s);
	return CL_SUCCESS;
}


int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len,const element_data data)
{
	struct element* element;
	struct element* deleted_element = NULL;
	size_t tries = 1; 
	size_t idx;
	if(!s)
		return CL_ENULLARG; 
	do {
		PROFILE_CALC_HASH(s);
		idx = hash(key, len, s->capacity); 
		element = &s->htable[idx];

		do {
			if(!element->key) {
				unsigned char* thekey;
				/* element not found, place is empty, insert*/
				if(deleted_element) {
					/* reuse deleted elements*/
					element = deleted_element;
					PROFILE_DELETED_REUSE(s, tries);
				}
				else {
					PROFILE_INSERT(s, tries);
				}
				thekey = cli_malloc(len+1);
				if(!thekey)
					return CL_EMEM;
				strncpy((char*)thekey,(const char*)key,len+1);
				element->key = thekey;
				element->data = data;
				s->used++;		
				if(s->used > s->maxfill) {
					cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity);
					hashtab_grow(s);
				}
				return 0;
			}
			else if(element->key == DELETED_KEY) {
				deleted_element = element;
			}
			else if(strncmp((const char*)key,(const char*)element->key,len)==0) {
				PROFILE_DATA_UPDATE(s, tries);
				element->data = data;/* key found, update */
				return 0;		
			}
			else {
				idx = (idx + tries++) % s->capacity;
				element = &s->htable[idx];
			}
		} while (tries <= s->capacity);
		/* no free place found*/
		PROFILE_HASH_EXHAUSTED(s);
		cli_dbgmsg("hashtab.c: Growing hashtable %p, because its full, old size:%ld.\n",(void*)s,s->capacity);
	} while( hashtab_grow(s) >= 0 );
	cli_warnmsg("hashtab.c: Unable to grow hashtable\n");
	return CL_EMEM;
}

void hashtab_delete(struct hashtable *s,const unsigned char* key,const size_t len)
{
	struct element* e = hashtab_find(s,key,len);
	if(e && e->key) {	
		PROFILE_HASH_DELETE(s);
		free(e->key);/*FIXME: any way to shut up warnings here? if I make key char*, I get tons of warnings in entitylist.h */
		e->key = DELETED_KEY;
		s->used--;
	}
}

void hashtab_clear(struct hashtable *s)
{
	size_t i;
	PROFILE_HASH_CLEAR(s);
	for(i=0;i < s->capacity;i++) {
		if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
			free(s->htable[i].key);/*FIXME: shut up warnings */
	}
	memset(s->htable, 0, s->capacity);
	s->used = 0;
}


int hashtab_store(const struct hashtable *s,FILE* out)
{
	size_t i;
	for(i=0; i < s->capacity; i++) {
		const struct element* e = &s->htable[i];
		if(e->key && e->key != DELETED_KEY) {
			fprintf(out,"%ld %s\n",e->data,e->key);
		}
	}
	return CL_SUCCESS;
}

int hashtab_generate_c(const struct hashtable *s,const char* name)
{
	size_t i;
	printf("/* TODO: include GPL headers */\n");
	printf("#include <hashtab.h>\n");
	printf("static struct element %s_elements[] = {\n",name);
	for(i=0; i < s->capacity; i++) {
		const struct element* e = &s->htable[i];
		if(!e->key)
			printf("\t{NULL, 0},\n");
		else if(e->key == DELETED_KEY)
			printf("\t{DELETED_KEY,0},\n");
		else
			printf("\t{(const unsigned char*)\"%s\", %ld},\n", e->key, e->data);
	}
	printf("};\n");
	printf("const struct hashtable %s = {\n",name);
	printf("\t%s_elements, %ld, %ld, %ld", name, s->capacity, s->used, s->maxfill);
	printf("\n};\n");

	PROFILE_REPORT(s);
	return 0;
}

int hashtab_load(FILE* in, struct hashtable *s)
{
	char line[1024];
	while (fgets(line, sizeof(line), in)) {
		unsigned char l[1024];
		int val;
		sscanf(line,"%d %1023s",&val,l);
		hashtab_insert(s,l,strlen((const char*)l),val);
	}
	return CL_SUCCESS;
}
This article has been dead for over six months. Start a new discussion instead.