private void button1_Click(object sender, EventArgs e)
{
test();
}
public void test()
{
Dictionary<string, string> LnksDict = new Dictionary<string, string>();
using (SmartWebClient smwc = new SmartWebClient())
{
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(smwc.DownloadString("http://www.google.com/adplanner/static/top1000/"));
var links = htmlDoc.DocumentNode
.Descendants("a").Select(x => x.Attributes["href"]);
foreach (var link in htmlDoc.DocumentNode.SelectNodes("//a"))
{
var UrlVal= link.Attributes["href"].Value;
var name = UrlVal.Split('.')[1];
LnksDict.Add(name, UrlVal);
}
}
}
#region <<=========== SmWbCl ============>>
public class SmartWebClient : WebClient
{
private readonly int maxConcurentConnectionCount;
public SmartWebClient(int maxConcurentConnectionCount = 20)
{
this.maxConcurentConnectionCount = maxConcurentConnectionCount;
}
protected override WebRequest GetWebRequest(Uri address)
{
var httpWebRequest = (HttpWebRequest)base.GetWebRequest(address);
if (httpWebRequest == null)
{
return null;
}
if (maxConcurentConnectionCount != 0)
{
this.Proxy = null;
this.Encoding = Encoding.GetEncoding("UTF-8");
httpWebRequest.ServicePoint.ConnectionLimit = maxConcurentConnectionCount;
}
return httpWebRequest;
}
}
#endregion
in this code i am trying to build a list of urls to be used as autoComplete source later.
what i am doing wrong is notc onditioning on adding the parsed values into the dictionary .
i need to find a way to add domain name as the key, even if already exist,
so i would like to be able to make a condition :
if the key in this dictionary already exists, add collection index of current link to string.value of key as a suffix
or if you would like to suggest a different solution all together… i will be happy to see new example.
thanks
I think what you want, rather than a
Dictionary<string, string>, is aDictionary<string, HashSet<string>>. That way, you can build a list of URLs for each domain. Your code to add an item to the list would be:The key here is that calling
Addon aHashSetwhen the item is already there won’t throw an exception. It just doesn’t add it again and returnsfalseto indicate that the item was already in the collection.When you’re done, you have a list of URLs for each host (domain), which you can then use for your auto completion.
By the way, your method of splitting out the host using
Split('.')isn’t going to work very well. It assumes domains of the form “www.example.com”. If you run into, for example, “example.com” (without the “www”), you’re going to get “com” for the name. Also, “www.example.com” is going to collide with “www.example.org” and “www.example.co.uk”. You need a better way of identifying hosts.