Could you post a sample html input and the desired output?
radoulov
Junior Poster in Training
54 posts since Feb 2007
Reputation Points: 10
Solved Threads: 5
If you have GNU Awk:
awk 'END {
print "#links:", _
for (i=1;i<=_;i++)
print la[i]
print "#images:", __
for (i=1;i<=__;i++)
print ia[i] }
{ r = $0
while (match(r = substr(r, RSTART+RLENGTH), /<a href=([^>]*)>/, t))
la[++_] = t[1]
}
{ r = $0
while (match(r = substr(r, RSTART+RLENGTH), /<img([^>]*)>/, t))
ia[++__] = t[1]
}' html
radoulov
Junior Poster in Training
54 posts since Feb 2007
Reputation Points: 10
Solved Threads: 5
You could:
awk 'END {
print "#links:", _
for (i=1;i<=_;i++)
print la[i]
print "#images:", __
for (i=1;i<=__;i++)
print ia[i] }
{ r = $0
while (match(r = substr(r, RSTART+RLENGTH), /<(a href=|img)([^>]*)>/, t)) {
if (substr(r, RSTART, 2) == "<a")
la[++_] = t[2]
else
ia[++__] = t[2]
}
}' html
Or, given that you insist for the variable :)
awk 'END {
print "#links:", _
for (i=1;i<=_;i++)
print la[i]
print "#images:", __
for (i=1;i<=__;i++)
print ia[i] }
{ r = $0; tag = "<(a href=|img)([^>]*)>"
while (match(r = substr(r, RSTART+RLENGTH), tag, t)) {
if (substr(r, RSTART, 2) == "<a")
la[++_] = t[2]
else
ia[++__] = t[2]
}
}' html
Or even:
awk 'END {
print "#links:", _
for (i=1;i<=_;i++)
print la[i]
print "#images:", __
for (i=1;i<=__;i++)
print ia[i] }
{ r = $0; lnk = "a href="; img = "img"; tag = "<("lnk"|"img")([^>]*)>"
while (match(r = substr(r, RSTART+RLENGTH), tag, t)) {
if (substr(r, RSTART, RLENGTH) ~ "^<"lnk)
la[++_] = t[2]
else
ia[++__] = t[2]
}
}' html
radoulov
Junior Poster in Training
54 posts since Feb 2007
Reputation Points: 10
Solved Threads: 5
Did you test your code?
Do you get the expected result?
radoulov
Junior Poster in Training
54 posts since Feb 2007
Reputation Points: 10
Solved Threads: 5