I have this html table:
<div style="width: 398px;">
<table cellpadding="4" cellspacing="0" width="100%" style="border: 2px solid #ffffff;">
<tbody>
<tr class="parrafo-ver-negro">
<th class="borde-abajo borde-derecha" style="text-align: left;">Parada Nº</th>
<th class="borde-abajo" style="text-align: left;">Destino</th>
</tr>
<tr>
<td class="borde-derecha"><a class="parrafo-ver-negro link-nro-sms">5729</a></td>
<td class="parrafo_ver-gris letra9">103 NEGRO > Villa Gdor. Galvez<br>103 ROJO > Villa Gdor. Gálvez<br></td>
</tr>
<tr>
<td class="borde-derecha"><a class="parrafo-ver-negro link-nro-sms">5292</a></td>
<td class="parrafo_ver-gris letra9">103 NEGRO > Granadero Baigorria<br>103 ROJO > Mini Terminal Barrio Rucci<br></td>
</tr>
</tbody>
</table>
</div>
My code is this:
public static List<Parada> parsear(string html)
{
int cual;
int _parada;
string _destino;
List<Parada> lp = new List<Parada>();
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
foreach (HtmlNode tabla in doc.DocumentNode.SelectNodes("//table"))
{
foreach (HtmlNode fila in tabla.SelectNodes("//tr"))
{
cual = 1; _parada = 0; _destino = "";
foreach (HtmlNode celda in fila.SelectNodes("//td"))
{
if (cual == 1)
{
_parada = Int32.Parse(celda.InnerText);
cual = 2;
}
else if (cual == 2)
{
_destino = celda.InnerText;
cual = 1;
lp.Add(new Parada(_parada, _destino));
}
}
}
}
return lp;
}
I should end up with two elements of List of Parada (the first TR I have to ignore, since it’s the header), but instead I have 6, instead of two.
What’s wrong with the code?
I’ve tried many things but with the same result.
Thanks.
the fila.SelectNodes still iterates through the entire document so you iterating through all 4
tdnodes 3 times (once pertrnode). Try this code instead…