This program was written and tested in unix/linux environment (in EMacs editor )with a GCC compiler.

/* This program was written using Emacs editor and tested using gcc    compiler in Unix environment for other environments and other compilers than GCC you might have to make slight modifications */

/* The author of this program is Raghu.R, currently pursuing Masters at USC */

/* ------------------------------------------Note--------------------------------*/
/* I have not followed the best of the programming practices, for eg I have used goto at a place or two, even though I could have done it using while or for, its just my fancy to use goto rather than those two, I had used goto and I have used bubble sort to sort the string, again I could have used merge or quick but since the string size is usally of smaller order I didnt use better sorting algorithms */

/* Any feedbacks and comments are welcome, I can be reached at rramaswa@usc.edu */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>

void BubbleSort(char *str, int array_size);
void Print_Stack(char *stack,int length,char *str);
void Permute(char str[],int level,int length,int index,char stack[],int cell);	
int Find_InStack(char value,char *stack,int len);
int Check_InVector(int level,int length);

//int **hash_table;
int *levels_vector;

int main(void)
{
	char *str;
	//int *hash_table;
	int length;
	int index = -1;
	
	int i = 0;
	int j = 0;
	
	char *stack;
	int spl_flag;
	
	spl_flag = 0;

	/* This is the worst part of the algo, I am assuming the string size is no more than 1000, because there is no way before hand I can know the size of the string before hand of course  I can allocate smaller size of array monitor for return key trap the return signal and reallocate but I didnt want to do all that crap :) */
	/* -----------------------------------Note---------------------------------------------*/
	/*   for sizes above 100 itself it would take ages to compute the different permutations, if you want to test for sizes for 1000, change the value in malloc and use parallel supercomputers to get instant results :) */

	str = (char *)malloc(1000 * sizeof(char));
	printf("Enter the String ..\n");
	gets(str);
	length = strlen(str);	

	str[length] = '\0';	
	
	/* sort the input string */
	BubbleSort(str,length);
	printf("Sorted String ..\n");
	puts(str);
	
	/* Start computing permutations */
	printf("\n\nPrinting permutations .. \n\n");	
	
	/* If the string is of length 1 it is a special case */
	if(length == 1)
	{
	  printf("[%d] ",1);
	  puts(str);          
	  goto B;
	}
	
	/* Allocate memory for a vector (a array initialized to zero)*/
	/* This would store the current index at each level (level in the tree) */
	levels_vector = (int *)malloc(length * sizeof(int ));
	assert(levels_vector);
	
	/* Initialisze the levels_vector */

	for(i = 0;i < length;i++)
	{
	  levels_vector[i] = i;
	}
	
	/* Call the permutation function for each level 1(ie beginning each character of the string )*/
	for(i = 0;i < length;i++)
	{
	  /* Allocate memory for stack */
	  /* Stack would contain the string permuted */
	  stack = (char *)malloc(length *sizeof(int));
               assert(stack);

	  /* level 1 assigned to various characters in the string based on iterations*/
	  levels_vector[0] = i;
	  Permute(str,0,length,index,stack,i);	
	  free(stack);
	}
	
B:	return 1;
	
}

/* This function computes the different permutations of the given string */
void Permute(char str[],int level,int length,int index,char stack[],int cell)
{
	int i = 0;	
	int j = 0;
	int k = 0;
	level++;
	//printf("  Level %d ",level);

	/* Return at level-2 the reason why I am doing this is at level 02 in the recursion tree it is obvious that the string would have to permute between the rest of remaining charaters for eg if the string in the stack is abd the remaining chracters would be c and e (assuming input string is abcde), so the obvious permutations would be abdce and abdec, I could have filled the stack going onto level = length but I thought it is obvious and would save recursion steps */

	if(level > length - 2)
	{
	  Print_Stack(stack,length,str);
	  return;
	}
    
	if(level == 1)
	{
	  /* If the level is 1 it would be a special case */
	  stack[level-1] = str[levels_vector[level-1]];              
	}
	else
	{
	    levels_vector[level-1] = Check_InVector(level,length);  
	    stack[level-1] = str[levels_vector[level-1]];  
	}
    
	/* This recursion would branch the left subtree */
	Permute(str,level,length,index,stack,cell);
	for(i = level;i < length-1;i++)	
	{	
	  /* This recursion would branch the rite subtree */
	  Permute(str,level,length,index,stack,cell);
	  //printf("\n");
	}
}   

/* This function returns the index of the character to be selected from the input string based on the current level after checking the levels_vector array */

int Check_InVector(int level,int length)
{
  int i = 0;
  int value = 0;

   /* This is a important step, for each current level other than level do the following for all the levels above it check if the current character is already set in levels_vector, if already present skip and select the next character */

  value = levels_vector[level-1];
 
 A: value = value % length;

 for(i = 0;i < level;i++)
 {
   /* If the current value is already present in the levels above it, this level cannot select the index to that character, increment and branch to label A */
   if(levels_vector[i] == value)
     {
      value++;
      goto A;
     }
 }
 return value;
}

/* This function prints the contents in the stack */
void Print_Stack(char *stack,int length,char *str)
{
    int i = 0;
    char c1,c2;
    int char_count = 0;
    static int flag = 0;
    static int count = 0;
    //printf("\n Printing the stack now ..\n");
    //printf(" %d : ",count+1);

    /*--------------------------------------------------------------------------*/
    /* This part is specifically for the part I mentioned earlier about
       skipping 2 levels of recursion, the following part of the code detects the 2 characters missing from the stack that have to be included to finish the permutation */

    for(i = 0;i < length;i++)
    {
      if(Find_InStack(str[i],stack,length-2) == 0)
      {
	if(char_count == 0)
	  {
	    c1 = str[i];             
	    char_count++;
	  }            
	else
	  {
	    c2 = str[i];    
	  }            
      }
    }
    if(flag == 0)
      {
	stack[length-2] = c1;        
	stack[length-1] = c2;
	flag++;
      }
    else
      {
	stack[length-2] = c2;        
	stack[length-1] = c1;
	flag = 0;
      }
    
    /*----------------------------------------------------------------------*/

    printf("[%d] ",count+1);
    
    /* Print the contents in the stack */
    for(i = 0;i < length;i++)
      {
	printf(" %c ",stack[i]);
      }
    count++;
    printf("\n");
}

/* This function checks if the given character is in the stack */
int Find_InStack(char value,char *stack,int len)
{
     int i = 0;
     for(i = 0;i < len;i++)
     {
      if(stack[i] == value)
        return 1;      
     }
     return 0;
}


/* This function bubble sorts the input string */
void BubbleSort(char *str, int array_size)
{
  int i, j;
  char temp;

  for (i = (array_size - 1); i >= 0; i--)
  {
    for (j = 1; j <= i; j++)
    {
      if (str[j-1] > str[j])
      {
        temp = str[j-1];
        str[j-1] = str[j];
        str[j] = temp;
      }
    }
  }
}

Here are just a few suggestions:

line 46: >> gets(str);
Never ever use gets(). What will happen if I enter more characters than the buffer can hold? Answer: Seg Fault. Use fgets() instead because it will limit the input to the max length you want.

line 49: has no value because the string is already NULL terminated.

line 64: Delete that goto statement -- there are a lot of better ways to do that. goto is considered very bad programming practice.

lines 44 and 69: delete the typecase. C language does not require a typecase for functions such as malloc() which return void *.

line 84: There is no need to allocate stack on every iteration of that loop and may fragment memory! That's just grossly inefficient. Allocate it once before the loop starts then free it once after the loop ends.