Sign Up

Sign Up to our social questions and Answers Engine to ask questions, answer people’s questions, and connect with other people.

Have an account? Sign In

Have an account? Sign In Now

Sign In

Login to our social questions & Answers Engine to ask questions answer people’s questions & connect with other people.

Sign Up Here

Forgot Password?

Don't have account, Sign Up Here

Forgot Password

Lost your password? Please enter your email address. You will receive a link and will create a new password via email.

Have an account? Sign In Now

You must login to ask a question.

Forgot Password?

Need An Account, Sign Up Here

Please briefly explain why you feel this question should be reported.

Please briefly explain why you feel this answer should be reported.

Please briefly explain why you feel this user should be reported.

Sign InSign Up

The Archive Base

The Archive Base Logo The Archive Base Logo

The Archive Base Navigation

  • SEARCH
  • Home
  • About Us
  • Blog
  • Contact Us
Search
Ask A Question

Mobile menu

Close
Ask a Question
  • Home
  • Add group
  • Groups page
  • Feed
  • User Profile
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Buy Points
  • Users
  • Help
  • Buy Theme
  • SEARCH
Home/ Questions/Q 6098017
In Process

The Archive Base Latest Questions

Editorial Team
  • 0
Editorial Team
Asked: May 23, 20262026-05-23T13:06:32+00:00 2026-05-23T13:06:32+00:00

I have the following code: #include <stdio.h> #include <stdlib.h> #include <string.h> #include <curl/curl.h> char

  • 0

I have the following code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

char * return_next(char *link, int rand_flag);
char* strip_parens(char* string);
char* strip_itals(char* string);
char* strip_tables(char* string);

struct MemoryStruct {
    char *memory;
    size_t size;
};


static size_t
WriteMemoryCallback(void *ptr, size_t size, size_t nmemb, void *data)
{
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)data;

mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if (mem->memory == NULL) {
    /* out of memory! */ 
    printf("not enough memory (realloc returned NULL)\n");
    exit(EXIT_FAILURE);
}

memcpy(&(mem->memory[mem->size]), ptr, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;

return realsize;
}


int main(void)
{



char *page = malloc(1000);
page = strcpy(page, "http://en.wikipedia.org/wiki/Literature");
char *start = malloc(1000);
start = strcpy(start, page);
printf("%s\n\n", page);
int i = 0, rand_flag = 0;
while(strcmp(page, "http://en.wikipedia.org/wiki/Philosophy")){
    i++;
    page = return_next(page, rand_flag);
    printf("deep: %d, %s\n\n", i, page);
    rand_flag = 0;
}
printf("start link: %s, is %d clicks from philosophy", start, i);

return 0;

}


char * return_next(char *link, int rand_flag){
CURL *curl_handle;
struct MemoryStruct chunk;
chunk.memory = malloc(1); 
chunk.size = 0;    

curl_global_init(CURL_GLOBAL_ALL);
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, link);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
if(rand_flag){
    curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1);   
}
curl_easy_perform(curl_handle);
curl_easy_cleanup(curl_handle);

char *theString = malloc(strlen(chunk.memory)+1);

char *theString1 = malloc(strlen(theString) + 1);

theString = strstr(chunk.memory, "div id=\"body");

theString1 = strip_tables(theString);

if(chunk.memory)
    free(chunk.memory);

theString = strstr(theString1, "<p>");

theString1 = strip_itals(theString);

theString = strip_parens(theString1);

curl_global_cleanup();

return theString;
}

char* strip_parens(char* string) {
long len = strlen(string);
char* result = malloc(len + 1);
int num_parens = 0;
int i, j = 0;
for(i=0; i < len; i++) {
    char c = string[i];
    if(c == '(') {
        num_parens++;
    }
    else if(c == ')' && num_parens > 0) {
        num_parens--;
    }
    else if(num_parens == 0) {
        if(c == '<'){
            if (string[i+1] == 'a'){
                if (string[i+2] == ' ') {
                    if(string[i+3] == 'h'){
                        i = i+9;

                        for(;string[i] != '"'; i++){

                            result[j] = string[i];
                            j++;
                        }
                        result[j] = '\0';
                        len = strlen("http://en.wikipedia.org");
                        char *final = malloc(j+len);
                        final = strcpy(final, "http://en.wikipedia.org");
                        return strcat(final, result);
                    }
                }
            }
        }
    }
}
result[j] = '\0';
return result;
}

char* strip_itals(char* string) {
long len = strlen(string);
char* result = malloc(len + 1);
int inside = 0;
int i, j = 0;
for(i=0; i < len; i++) {
    //printf(".%d, %c, %d\n", i, string[i], inside);
    char c = string[i];
    if(c == '<' && inside == 0) {
        if (string[i+1] == 'i'){
            if (string[i+2] == '>') {
                inside++;
                i = i+2;
            }
        }
    }
    else if(c == '<' && inside > 0) {
        //printf("first if\n");
        if (string[i+1] == '/'){
            if (string[i+2] == 'i') {
                inside--;
                i=i+3;
            }
        }
    }
    if(inside == 0) {
        result[j] = c;
        j++;
    }
}
result[j] = '\0';
return result;
}

char* strip_tables(char* string) {
//printf("%s\n", string);
long len = strlen(string);
//long len = 1000000;

char* result = malloc(len + 1);
int inside = 0;
int i, j = 0;
for(i=0; i < len; i++) {
    //printf(".%d, %c, %d\n", i, string[i], inside);
    char c = string[i];
    if(c == '<' && inside == 0) {
        if (string[i+1] == 't'){
            if (string[i+2] == 'a') {
                if (string[i+3] == 'b') {
                    if (string[i+4] == 'l') {
                        inside++;
                        i = i+4;
                    }
                }
            }
        }
    }
    else if(c == '<' && inside > 0) {
        //printf("first if\n");
        if (string[i+1] == '/'){
            if (string[i+2] == 't') {
                if (string[i+3] == 'a') {
                    if (string[i+4] == 'b') {
                        if (string[i+5] == 'l') {
                            inside--;
                            i=i+7;
                        }
                    }
                }
            }
        }
    }
    if(inside == 0) {
        result[j] = c;
        j++;
    }
}
result[j] = '\0';
return result;
}

That given a link to a wiki article will return the first link back, then in main I loop over this function till I arrive at a specified article. I ran from some random article and discovered when it passes over “Literature” it gets “Art” as the next page but when it goes to search Art curl returns a blank string- if i print(“%s”, chunk.memory) after the call I get (null). If I manually force the function to start at art it works fine, trailing all the way to philosophy. For the life of me I cant see any differences… I put some diagnostic printfs in and got the following-

this is the address ~> !http://en.wikipedia.org/wiki/Art!, rand flag = 0

With the link inbetween the exlamation marks, so I know it’s parsing the link back properly, and rand_flag is always set to 0 at the moment.

Any tips, pointers or solutions much appreciated.

  • 1 1 Answer
  • 0 Views
  • 0 Followers
  • 0
Share
  • Facebook
  • Report

Leave an answer
Cancel reply

You must login to add an answer.

Forgot Password?

Need An Account, Sign Up Here

1 Answer

  • Voted
  • Oldest
  • Recent
  • Random
  1. Editorial Team
    Editorial Team
    2026-05-23T13:06:32+00:00Added an answer on May 23, 2026 at 1:06 pm

    It is not generally possible to say anything about a program if all you have is an uncompilable piece of code. So I’m going to give some generic recommendations.

    1. Check return values of your functions.
    2. Set up callbacks to libcurl so that you can print every byte that goes in and out with a flip of a switch (much like curl -v does — look at its source if you need guidance).
    3. Sniff your network traffic.
    4. If you see that a request is not sent at all, or that it’s sent but no data is returned, you have narrowed your problem a bit.
    • 0
    • Reply
    • Share
      Share
      • Share on Facebook
      • Share on Twitter
      • Share on LinkedIn
      • Share on WhatsApp
      • Report

Sidebar

Related Questions

I have the following C-code: #include<stdio.h> #include<stdlib.h> typedef struct node { int a; }node;
I have issues with the following code: #include <stdio.h> #include <stdlib.h> #include <string.h> #include
I have the following code: #include <stdlib.h> #include <stdio.h> typedef void (*func_t)(void * data);
Suppose that we have the following bit of code: #include <pthread.h> #include <stdio.h> #include
I have the following bit of legacy C++ code that does not compile: #include
I have a C# application that includes the following code: string file = relativePath.txt;
I have the following code: String inputFile = somefile.txt; FileInputStream in = new FileInputStream(inputFile);
In the following code, I copy a string in to a char* str, which
I Have following code: Controller: public ActionResult Step1() { return View(); } [AcceptVerbs(HttpVerbs.Post)] public
I have following Code Block Which I tried to optimize in the Optimized section

Explore

  • Home
  • Add group
  • Groups page
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Users
  • Help
  • SEARCH

Footer

© 2021 The Archive Base. All Rights Reserved
With Love by The Archive Base

Insert/edit link

Enter the destination URL

Or link to existing content

    No search term specified. Showing recent items. Search or use up and down arrow keys to select an item.