Can anyone help me with this problem,

I have 2 textboxes in which i load 2 text files.
And now in the third textbox i need to display the word frequency ( the word and how many times is been used that word in those 2 texboxes). And finally calculating the Jaccard Coefficient which will show the similarities of the two text documents

Please if anyone can help me with this

Recommended Answers

All 3 Replies

public partial class Window1 : Window
    {
        public Window1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, RoutedEventArgs e)
        {


            Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
            dlg.FileName = "Document"; 
            dlg.DefaultExt = ".txt"; 
            dlg.Filter = "Text documents (.txt)|*.txt"; 

            
            Nullable<bool> result = dlg.ShowDialog();

           
            if (result == true)
            {
                StreamReader stReader = new StreamReader(dlg.FileName);
                textBox2.Text = stReader.ReadToEnd();

            }

        }

        private void button2_Click(object sender, RoutedEventArgs e)
        {
            Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
            dlg.FileName = "Document"; 
            dlg.DefaultExt = ".txt"; 
            dlg.Filter = "Text documents (.txt)|*.txt"; 

           
            Nullable<bool> result = dlg.ShowDialog();

           
            if (result == true)
            {
                StreamReader stReader2 = new StreamReader(dlg.FileName);
                textBox1.Text = stReader2.ReadToEnd();

            }
        }

        private void button3_Click(object sender, RoutedEventArgs e)
        {
      
            StringBuilder str = new StringBuilder();
            for(int a=0;a<textBox1.Text.Count();a++)
            {
                str.Append(textBox1.Text[a]);
                
            }
            string s = str.ToString();
            ArrayList gatherSplit = new ArrayList(s.Split(new char[] { '\t', ' ', '.', ',', '?', '!', ':', ';' }));
            HashSet<string> hs = new HashSet<string>();
            for (int d = 0; d < gatherSplit.Count; d++)
            {
                hs.Add(gatherSplit[d].ToString());
            }
            ArrayList harr = new ArrayList();
            for (int f = 0; f < hs.Count; f++)
            {
                harr.Insert(f, hs.ElementAt(f));
            }
           
            StringBuilder str1 = new StringBuilder();
            for (int t = 0; t < textBox2.Text.Count(); t++)
            {
                str1.Append(textBox2.Text[t]);
            }
            string s1 = str1.ToString();
            ArrayList gatherSplit1 = new ArrayList(s1.Split(new char[] { '\t', ' ', '.', ',', '?', '!', ':', ';' }));
            HashSet<string> hs1 = new HashSet<string>();
            for (int y = 0; y < gatherSplit1.Count; y++)
            {
                hs1.Add(gatherSplit1[y].ToString());
            }
            ArrayList harr1 = new ArrayList();
            for (int f1 = 0; f1 < hs1.Count; f1++)
            {
                harr1.Insert(f1, hs1.ElementAt(f1));
            }

            HashSet<string> interhs = new HashSet<string>();
            interhs = hs;
            interhs.IntersectWith(hs1);
    
            ArrayList wordfreq1 = new ArrayList();
            for (int w = 0; w < interhs.Count; w++)
            {
                int z = 0;
                for (int q = 0; q < gatherSplit.Count; q++)
                {
                    if (interhs.ElementAt(w) == gatherSplit[q].ToString())
                    {
                        z += 1;
                    }

                }
                wordfreq1.Insert(w, z.ToString());
            }
           
            ArrayList wordfreq2 = new ArrayList();
            for (int w1 = 0; w1 < interhs.Count; w1++)
            {
                int z = 0;
                for (int q1 = 0; q1 < gatherSplit1.Count; q1++)
                {
                    if (interhs.ElementAt(w1) == gatherSplit1[q1].ToString())
                    {
                        z += 1;
                    }

                }
                wordfreq2.Insert(w1, z.ToString());
            }
          
            for (int ty = 0; ty < wordfreq1.Count; ty++)
            {
                textBox3.AppendText(interhs.ElementAt(ty) + "\t\t" + wordfreq1[ty].ToString() + "\t\t"+wordfreq2[ty].ToString()+"\r\n");
            }
         
            double pprodq=0;
            double x = 0;
            double y1 = 0;
            for(int pp=0;pp<wordfreq1.Count;pp++)
            {
                pprodq=pprodq+(Convert.ToDouble(wordfreq1[pp])* Convert.ToDouble(wordfreq2[pp]));
                x = x + (Convert.ToDouble(wordfreq1[pp]) * Convert.ToDouble(wordfreq1[pp]));
                y1 = y1 + (Convert.ToDouble(wordfreq2[pp]) * Convert.ToDouble(wordfreq2[pp]));
            }
            double jaccard = 0;
            jaccard = pprodq / ((x + y1) - (pprodq));
            textBox4.Text = jaccard.ToString();
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.