cURL
Haxx ad
libcurl

Shopping cart software, Online file storage, Online photo storage, Hosted shopping cart, Contact management software, Email marketing software, Project management software, Issue tracking software, Online notepad, Web publishing software

curl's project page on SourceForge.net

Sponsors:
Haxx

cURL > libcurl > C API index > Example Source Codes > htmltidy.c

htmltidy.c

Download htmltidy.c
/*****************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * $Id: htmltidy.c,v 1.2 2008-05-22 21:20:09 danf Exp $
 *
 * Download a document and use libtidy to parse the HTML.
 * Written by Jeff Pohlmeyer
 *
 * LibTidy => http://tidy.sourceforge.net
 *
 * gcc -Wall -I/usr/local/include tidycurl.c -lcurl -ltidy -o tidycurl
 *
 */ 
 
#include <stdio.h>
#include <tidy/tidy.h>
#include <tidy/buffio.h>
#include <curl/curl.h>
 
/* curl write callback, to fill tidy's input buffer...  */ 
uint write_cb(char *in, uint size, uint nmemb, TidyBuffer *out)
{
  uint r;
  r = size * nmemb;
  tidyBufAppend( out, in, r );
  return(r);
}
 
/* Traverse the document tree */ 
void dumpNode(TidyDoc doc, TidyNode tnod, int indent )
{
  TidyNode child;
  for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
  {
    ctmbstr name = tidyNodeGetName( child );
    if ( name )
    {
      /* if it has a name, then it's an HTML tag ... */ 
      TidyAttr attr;
      printf( "%*.*s%s ", indent, indent, "<", name);
      /* walk the attribute list */ 
      for ( attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) {
        printf(tidyAttrName(attr));
        tidyAttrValue(attr)?printf("=\"%s\" ",
                                   tidyAttrValue(attr)):printf(" ");
      }
      printf( ">\n");
    }
    else {
      /* if it doesn't have a name, then it's probably text, cdata, etc... */ 
      TidyBuffer buf;
      tidyBufInit(&buf);
      tidyNodeGetText(doc, child, &buf);
      printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:"");
      tidyBufFree(&buf);
    }
    dumpNode( doc, child, indent + 4 ); /* recursive */ 
  }
}
 
 
int main(int argc, char **argv )
{
  CURL *curl;
  char curl_errbuf[CURL_ERROR_SIZE];
  TidyDoc tdoc;
  TidyBuffer docbuf = {0};
  TidyBuffer tidy_errbuf = {0};
  int err;
  if ( argc == 2) {
    curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
 
    tdoc = tidyCreate();
    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ 
    tidyOptSetInt(tdoc, TidyWrapLen, 4096);
    tidySetErrorBuffer( tdoc, &tidy_errbuf );
    tidyBufInit(&docbuf);
 
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
    err=curl_easy_perform(curl);
    if ( !err ) {
      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ 
      if ( err >= 0 ) {
        err = tidyCleanAndRepair(tdoc); /* fix any problems */ 
        if ( err >= 0 ) {
          err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ 
          if ( err >= 0 ) {
            dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ 
            fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */ 
          }
        }
      }
    }
    else
      fprintf(stderr, "%s\n", curl_errbuf);
 
    /* clean-up */ 
    curl_easy_cleanup(curl);
    tidyBufFree(&docbuf);
    tidyBufFree(&tidy_errbuf);
    tidyRelease(tdoc);
    return(err);
 
  }
  else
    printf( "usage: %s <url>\n", argv[0] );
 
  return(0);
}

All Examples

10-at-a-time.c
anyauthput.c
cacertinmem.c
certinfo.c
chkspeed.c
cookie_interface.c
curlgtk.c
curlx.c
debug.c
evhiperfifo.c
fileupload.c
fopen.c
ftpget.c
ftpgetinfo.c
ftpgetresp.c
ftpupload.c
ftpuploadresume.c
getinfo.c
getinmemory.c
ghiper.c
hiperfifo.c
htmltidy.c
http-post.c
httpcustomheader.c
httpput.c
https.c
multi-app.c
multi-debugcallback.c
multi-double.c
multi-post.c
multi-single.c
multithread.c
opensslthreadlock.c
persistant.c
post-callback.c
postit2.c
sampleconv.c
sendrecv.c
sepheaders.c
simple.c
simplepost.c
simplessl.c
smooth-gtk-thread.c
synctime.c
threaded-ssl.c

You'll also find all examples in the distribution archive, in the docs/examples directory.

donate! Page updated January 04, 2010.
web site info

File upload with ASP.NET