Here's a representation.
I massage the data a little before running it through the Regex. How you will eventually modify this will depend on how the server delivers the html page.
Imports System
Imports System.Collections.Generic
Imports System.IO
Imports System.Linq
Imports System.Net
Imports System.Text.RegularExpressions
Module DW_396137
Sub Main()
Dim strURL As String = "http://www.daniweb.com/certificates/stats.php"
Dim wc As New WebClient()
Dim rxMemberLinks As New Regex("href=.(?<mem_link>/members/.{1,20}/\d{1,}).>")
Dim lst_strFullLinks As New List(Of String)
Try
Dim fileIn As New StreamReader(wc.OpenRead(strURL))
fileIn.ReadToEnd().Replace("<", "\n") _
.Split("\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries) _
.Where(Function(s) rxMemberLinks.IsMatch(s)) _
.ToList() _
.ForEach(Sub(s) lst_strFullLinks.Add("http://www.DaniWeb.com" & rxMemberLinks.Match(s).Groups("mem_link").Value))
fileIn.Close()
Catch exc As Exception
Console.WriteLine("Exception: " + exc.Message)
Return
End Try
Console.WriteLine("Finished")
End Sub
End Module thines01
Postaholic
Team Colleague
2,425 posts since Oct 2009
Reputation Points: 445
Solved Threads: 402