easiest implementation of suffix array

nitin1 0 Tallied Votes 611 Views Share
#include<iostream>
#include<algorithm>
using namespace std;
#define MAX 10000000
#include<stdlib.h>
#include<string.h>

char s[MAX];
int p[MAX];
int bucket[MAX],nbucket[MAX];

struct suffix
{
       int idx;

}pos[MAX];

int H=0;


bool cmp(struct suffix i,struct suffix j)
{
    if(H==0)
    {
         return s[i.idx]<s[j.idx];    
    }
    else
    {

          if(bucket[i.idx]==bucket[j.idx])
          {
              return bucket[i.idx+H]<bucket[j.idx+H];                            
          }
          else
          {
               return bucket[i.idx]<bucket[j.idx];
          }
    }

}

bool cmp1(struct suffix *i,struct suffix *j)
{
     if(H==0)
     {
           if(s[i->idx]==s[j->idx])
           return true;
           else
           return false;  
     }else{

     if(bucket[i->idx]==bucket[j->idx] && bucket[i->idx+H]==bucket[j->idx+H])
     return true;
     else
     return false;
     }
}

int update(int l)
{
     int id=0,start=0,c=0;

       for(int i=0;i<l;i++)
       {
             if(i>0 && !cmp1(&pos[i],&pos[i-1]))  
             {
                    id++;
                    start=i;
             }

             if(i!=start)
             c=1;

             nbucket[pos[i].idx]=id;
       }     

       memcpy(bucket,nbucket,4*l);

       return c;
}

void suffixsort(int l)
{
     int i=0;
     int c=0;

     for(i=0;i<l;i++)
     {
        pos[i].idx=i;             
     }

     sort(pos,pos+l,cmp);
     c=update(l);

     for(H=1;c;H*=2)
     {
              sort(pos,pos+l,cmp);      

              c=update(l);
     }

     return;


}

int main()
{
    gets(s);
    int l=strlen(s)+1;

    suffixsort(l);

    for(int i=1;i<l;i++)
    printf("%d\n",pos[i].idx);

    return 0;
}
#include<iostream>
#include<algorithm>
using namespace std;
#define MAX 10000000
#include<stdlib.h>
#include<string.h>
 
char s[MAX];
int p[MAX];
int bucket[MAX],nbucket[MAX];
 
struct suffix
{
       int idx;
       
}pos[MAX];
 
int H=0;
 
 
bool cmp(struct suffix i,struct suffix j)
{
    if(H==0)
    {
         return s[i.idx]<s[j.idx];    
    }
    else
    {
        
          if(bucket[i.idx]==bucket[j.idx])
          {
              return bucket[i.idx+H]<bucket[j.idx+H];                            
          }
          else
          {
               return bucket[i.idx]<bucket[j.idx];
          }
    }
    
}
 
bool cmp1(struct suffix *i,struct suffix *j)
{
     if(H==0)
     {
           if(s[i->idx]==s[j->idx])
           return true;
           else
           return false;  
     }else{
     
     if(bucket[i->idx]==bucket[j->idx] && bucket[i->idx+H]==bucket[j->idx+H])
     return true;
     else
     return false;
     }
}
 
int update(int l)
{
     int id=0,start=0,c=0;
     
       for(int i=0;i<l;i++)
       {
             if(i>0 && !cmp1(&pos[i],&pos[i-1]))  
             {
                    id++;
                    start=i;
             }
             
             if(i!=start)
             c=1;
             
             nbucket[pos[i].idx]=id;
       }     
       
       memcpy(bucket,nbucket,4*l);
       
       return c;
}
 
void suffixsort(int l)
{
     int i=0;
     int c=0;
     
     for(i=0;i<l;i++)
     {
        pos[i].idx=i;             
     }
     
     sort(pos,pos+l,cmp);
     c=update(l);
     
     for(H=1;c;H*=2)
     {
              sort(pos,pos+l,cmp);      
              
              c=update(l);
     }
     
     return;
     
     
}
 
int main()
{
    gets(s);
    int l=strlen(s)+1;
    
    suffixsort(l);
    
    for(int i=1;i<l;i++)
    printf("%d\n",pos[i].idx);
    
    return 0;
}
ahmedhamdy 0 Light Poster

what is mean suffix array ?

is this code is work?

nitin1 15 Master Poster

suffix array is ultimate data structure for string problems. yes! this code is working fully and 101% correctly. (i can say this with full certainity). for searching the string suffixes, for sorting the suffixes of atring, for finding the frequeny of a substring in a string and many others. try to search and you will see the magic of suffix array. if you get what it's all about, you will find this code as the easiest one for suffix array. believe me, i have read about 50 links including articles, pages, wiki, topcoder , spoj forumns. and build this code. codes which i found were very tuff to understand. according to my level and as a beginner in this, this is simplest code sone can have for suffix array. thanks.

nitin1 15 Master Poster

@gonbe okay thanks for judging my code. i want you to just have a jist of this code and algo of suffix array and i want you to give a "poor" free code and with full description of code and with "C" as the language. thanks. and i wonder if you can do it. :-)

deceptikon 1,790 Code Sniper Team Colleague Featured Poster

and i wonder if you can do it. :-)

For a "poor man's" suffix array, I can do it easily:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static size_t *generate_suffix_array(const char *src)
{
    size_t len = strlen(src);
    size_t *sa = (size_t*)malloc(len * sizeof *sa);

    if (sa) {
        /* Generate unsorted suffix indexes */
        for (size_t i = 0; i < len; ++i)
            sa[i] = i;

        /*
            Note: Bubble sort is used for simplicity. If the string is expected
            to be larger than a few hundred characters it would be a good idea
            to replace this algorithm with something faster.  -deceptikon
        */
        /* Sort the suffix array */
        for (size_t i = 0; i < len; ++i) {
            for (size_t j = len - 1; j > 0; --j) {
                /* Compare in terms of the substring at the suffix index */
                if (strcmp(&src[sa[j]], &src[sa[j - 1]]) < 0) {
                    /* Swap in terms of the index */
                    size_t temp = sa[j];
                    sa[j] = sa[j - 1];
                    sa[j - 1] = temp;
                }
            }
        }
    }

    return sa;
}

int main(void)
{
    const char *src = "abracadabra";
    size_t *sa = generate_suffix_array(src);

    if (!sa) {
        fputs("Error generating suffix array\n", stderr);
        return EXIT_FAILURE;
    }

    fputs("Text:\t", stdout);

    for (size_t i = 0; src[i]; ++i)
        printf("%3c", src[i]);

    fputs("\nIndex\t", stdout);

    for (size_t i = 0; src[i]; ++i)
        printf("%3d", i);

    puts("\n\nPre-sorting\nIndex\tSuffix");

    for (size_t i = 0; src[i]; ++i)
        printf("%d:\t'%s'\n", i, &src[i]);

    puts("\n\nProst-sorting\nIndex\tSuffix");

    for (size_t i = 0; src[i]; ++i)
        printf("%d:\t'%s'\n", sa[i], &src[sa[i]]);

    free(sa);

    return EXIT_SUCCESS;
}

If you want to improve the complexity of the algorithm to the theoretical ideal of Theta(n) then things become more complex. But since you posted an "easiest" implementation, I'm assuming you didn't mean to write one of the more advanced variations.

Gonbe 32 Newbie Poster

and i wonder if you can do it. :-)

I have little doubt that I could, but I don't think I will just for the sake of proving you wrong. I've removed the downvote though even though the point made stands; christmas magic, I guess.

nitin1 15 Master Poster

no sir, thanks for this code. but i have implemented O(nlogn) approach. using bubble sort is nnlogn i suppose. main part is how we sort it so as get the bext complexity possible. for all contests, this approach is much. using bubble sort will give time limit exceeded. i have already implemneted it before you give this post. thanks sir :-)

deceptikon 1,790 Code Sniper Team Colleague Featured Poster

main part is how we sort it so as get the bext complexity possible.

You still don't have the best complexity possible. I find it somewhat amusing that you describe your solution as "easiest" and "poor", then complain about the performance of simpler solutions after practically demanding that someone else prove they can solve the same problem. :rolleyes:

for all contests, this approach is much.

Contests require an unreasonable focus on performance. If you want responses that fit into that niche area then make it very clear that your goal is to win contents and not solve a realistic problem.

using bubble sort will give time limit exceeded

Clearly you didn't read the comments that I so carefully included in the code.

nitin1 commented: you are awesome, you are strict and you also teach well. :-D thanks a lot to you +2
nitin1 15 Master Poster

yeah!! O(n) is the best complexity for this concept. okay! sir, according to you, contests don't improve a person ? i am not arguing, even i am asking you because may be i am wasting my time by attending contests. shouldn't i focus on perfomance ? till which level contests helps one to become a professional coder like you ? (not coder, i would love to say SOFTWARE DEVELOPER ). THANKS.

deceptikon 1,790 Code Sniper Team Colleague Featured Poster

according to you, contests don't improve a person ?

I didn't say that. But contests do require a unique skillset that isn't directly applicable to the real world, in my opinion. You can derive several skills for real world projects from the skills learned doing contests, but if you treat real projects like a contest I can guarantee you'll find yourself fired in short order. Conversely, the practical techniques used in the real world will consistently fail to do well in any kind of contest.

And more specifically, because I'm sure you'll ask the same question again: my immediate supervisor was a regular winner of programming contests in his college years, and he definitely thinks they were a valuable experience. So yes, doing contests is good for you, but don't trick yourself into thinking that just doing contests will make you a good professional programmer.

nitin1 commented: awesome explaination! can't get this wording from anybody who is in contact of me :-) +2
nitin1 15 Master Poster

yeah! i was going to ask that which you have already answered. :p yes! it admit which u said truly. no offense! but sir, tell me that at this level and the experience which i have , what can i do so that i develop skills which is needed in a PROFESSIONAL programmer ?

P.S i was thinking that i will be professional when i will join any big company like amazon, google or somthing like that. was i right in my thinking ? thanks

deceptikon 1,790 Code Sniper Team Colleague Featured Poster

at this level and the experience which i have , what can i do so that i develop skills which is needed in a PROFESSIONAL programmer ?

You could probably get in on some open source projects.

P.S i was thinking that i will be professional when i will join any big company like amazon, google or somthing like that. was i right in my thinking ?

You're a professional when you get paid to program. I'd further refine it into being a professional if you make a living off of programming, but that's just my opinion.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.