My code below takes a URL inputted by the user and prints out the HTML coded content of the page. The first for() loop of the code is meant to strip out the code between the <…> in the HTML but i cant seem to get it to work, it is not printing anything.
Can anyone see if there is a problem with my code, it works fine but doesn’t remove or print anything out after the aforementioned for() loop.
#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <curl/curl.h>
#include <curl/types.h>
#include <curl/easy.h>
#include <string.h>
/*
*
*/
size_t write_data(void *ptr, size_t size, size_t nmeb, void *stream) {
return fwrite(ptr, size, nmeb, stream);
}
/* function prototypes to define later */
char *do_web_request(char *url);
size_t static write_callback_func(void *buffer,
size_t size,
size_t nmemb,
void *userp);
int main(int argc, char** argv) {
char str[100];
int len;
printf("Enter the website URL:\n");
scanf("%s", str);
char *content = NULL;
content = do_web_request(str);
len = strlen(str);
int i;
int idx = 0;
int opened = 0;
printf("Original content::");
printf("%s", content);
printf("\nReplaced String ");
for (i = 0; i < len; i++) {
if (content[i] == '<') {
opened = 1;
} else if (content[i] == '>') {
opened = 0;
} else if (!opened) {
content[idx++] = content[i];
}
}
content[idx] = '\0';
printf("%s\n", content);
return (EXIT_SUCCESS);
}
/* the function to return the content for a url */
char *do_web_request(char *str) {
/* keeps the handle to the curl object */
CURL *curl_handle = NULL;
/* to keep the response */
char *response = NULL;
/* initializing curl and setting the url */
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, str);
curl_easy_setopt(curl_handle, CURLOPT_HTTPGET, 1);
/* follow locations specified by the response header */
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1);
/* setting a callback function to return the data */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_callback_func);
/* passing the pointer to the response as the callback parameter */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &response);
/* perform the request */
curl_easy_perform(curl_handle);
/* cleaning all curl stuff */
curl_easy_cleanup(curl_handle);
return response;
}
/* the function to invoke as the data recieved */
size_t static write_callback_func(void *buffer,
size_t size,
size_t nmemb,
void *userp) {
char **response_ptr = (char**) userp;
/* assuming the response is a string */
*response_ptr = strndup(buffer, (size_t) (size * nmemb));
}
I think this:
should be:
Otherwise, you won’t be correctly examining the returned HTML as the
lenwould be thelenof the URL entered by the user.