public partial class Form1 : Form { int y = 0; string url =

Question

0

Asked: June 4, 20262026-06-04T08:59:51+00:00 2026-06-04T08:59:51+00:00

public partial class Form1 : Form { int y = 0; string url =

0

public partial class Form1 : Form
{
   int y = 0;
   string url = @"http://www.google.co.il";
   string urls = @"http://www.bing.com/images/search?q=cat&go=&form=QB&qs=n";

   public Form1()
   {
       InitializeComponent();
       //webCrawler(urls, 3);
       List<string> a = webCrawler(urls, 1);
       //GetAllImages();
   }

   private int factorial(int n)
   {
      if (n == 0) return 1;
      else y = n * factorial(n - 1);
      listBox1.Items.Add(y);
      return y;
   }

   private List<string> getLinks(HtmlAgilityPack.HtmlDocument document)
   {
       List<string> mainLinks = new List<string>();

       if (document.DocumentNode.SelectNodes("//a[@href]") == null)
       { }

       foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
       {
           var href = link.Attributes["href"].Value;
           mainLinks.Add(href);
       }

       return mainLinks;
   }

   private List<string> webCrawler(string url, int levels)
   {
      HtmlAgilityPack.HtmlDocument doc;
      HtmlWeb hw = new HtmlWeb(); 

      List<string> webSites;// = new List<string>();
      List<string> csFiles = new List<string>();

      csFiles.Add("temp string to know that something is happening in level = " + levels.ToString());
      csFiles.Add("current site name in this level is : "+url);
      /* later should be replaced with real cs files .. cs files links..*/

      doc = hw.Load(url);
      webSites = getLinks(doc);

      if (levels == 0)
      {
         return csFiles;
      }
      else
      {
         int actual_sites = 0;

         for (int i = 0; i < webSites.Count() && i< 100000; i++) // limiting ourseleves for 20 sites for each level for now..
         //or it will take forever.
         {
             string t = webSites[i];
             /*
                    if (!webSites.Contains(t))
                    {
                        webCrawler(t, levels - 1);
                    }
             */

             if ( (t.StartsWith("http://")==true) || (t.StartsWith("https://")==true) ) // replace this with future FilterJunkLinks function
             {
                actual_sites++;
                csFiles.AddRange(webCrawler(t, levels - 1));
                richTextBox1.Text += t + Environment.NewLine;
             }
          }

          // report to a message box only at high levels..
          if (levels==1)
             MessageBox.Show(actual_sites.ToString());

          return csFiles;
       }                
    }

The exception is thrown after a few sites have been sent to the getLinks function.

The exception is in the getLinks function on the line:

foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))

Object reference not set to an instance of an object

I tried to use there IF to check if its null then I did return mainLinks; which is a list.

But if I’m doing it I’m not getting all the links from the website.

Now I’m using urls in the constructor if I’m using url (www.google.co.il) I’m getting the same exception after few seconds.

I can’t figure out why this exception is throw up. Is there any reason for this exception ?

System.NullReferenceException was unhandled
Message=Object reference not set to an instance of an object.
Source=GatherLinks
StackTrace:
at GatherLinks.Form1.getLinks(HtmlDocument document) in D:\C-Sharp\GatherLinks\GatherLinks\GatherLinks\Form1.cs:line 55
at GatherLinks.Form1.webCrawler(String url, Int32 levels) in D:\C-Sharp\GatherLinks\GatherLinks\GatherLinks\Form1.cs:line 76
at GatherLinks.Form1.webCrawler(String url, Int32 levels) in D:\C-Sharp\GatherLinks\GatherLinks\GatherLinks\Form1.cs:line 104
at GatherLinks.Form1..ctor() in D:\C-Sharp\GatherLinks\GatherLinks\GatherLinks\Form1.cs:line 29
at GatherLinks.Program.Main() in D:\C-Sharp\GatherLinks\GatherLinks\GatherLinks\Program.cs:line 18
at System.AppDomain._nExecuteAssembly(Assembly assembly, String[] args)
at System.AppDomain.ExecuteAssembly(String assemblyFile, Evidence assemblySecurity, String[] args)
at Microsoft.VisualStudio.HostingProcess.HostProc.RunUsersAssembly()
at System.Threading.ThreadHelper.ThreadStart_Context(Object state)
at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state)
at System.Threading.ThreadHelper.ThreadStart()

Report

Leave an answer
Cancel reply

You must login to add an answer.

Need An Account,

1 Answer

Editorial Team · Answer 1 · 2026-06-04T08:59:52+00:00

The problem appears to be that you’re testing for null but then doing nothing about it – here

            if (document.DocumentNode.SelectNodes("//a[@href]") == null)
            {
            }

I suspect you want to handle the null case but haven’t written the code to do it. You probably want something like:

    private List<string> getLinks(HtmlAgilityPack.HtmlDocument document)
        {
           List<string> mainLinks = new List<string>();
           if (document.DocumentNode.SelectNodes("//a[@href]") != null)
            {

                foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
                {
                    var href = link.Attributes["href"].Value;
                    mainLinks.Add(href);
                }
            }
            return mainLinks;
        }

you’d probably want to tidy up to something more like:

   private List<string> getLinks(HtmlAgilityPack.HtmlDocument document)
        {
           List<string> mainLinks = new List<string>();
           var linkNodes = document.DocumentNode.SelectNodes("//a[@href]");
           if (linkNodes != null)
            {
                foreach (HtmlNode link in linkNodes)
                {
                    var href = link.Attributes["href"].Value;
                    mainLinks.Add(href);
                }
            }
            return mainLinks;
        }

Sign Up

Sign In

Forgot Password

The Archive Base Latest Questions

public partial class Form1 : Form { int y = 0; string url =

Leave an answerCancel reply

1 Answer

Leave an answer
Cancel reply