diff options
Diffstat (limited to 'plugins/FTPFileYM/curl-7.29.0/docs/examples/htmltidy.c')
| -rw-r--r-- | plugins/FTPFileYM/curl-7.29.0/docs/examples/htmltidy.c | 130 | 
1 files changed, 130 insertions, 0 deletions
diff --git a/plugins/FTPFileYM/curl-7.29.0/docs/examples/htmltidy.c b/plugins/FTPFileYM/curl-7.29.0/docs/examples/htmltidy.c new file mode 100644 index 0000000000..a36e331bfe --- /dev/null +++ b/plugins/FTPFileYM/curl-7.29.0/docs/examples/htmltidy.c @@ -0,0 +1,130 @@ +/*************************************************************************** + *                                  _   _ ____  _ + *  Project                     ___| | | |  _ \| | + *                             / __| | | | |_) | | + *                            | (__| |_| |  _ <| |___ + *                             \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +/* Download a document and use libtidy to parse the HTML. + * Written by Jeff Pohlmeyer + * + * LibTidy => http://tidy.sourceforge.net + * + * gcc -Wall -I/usr/local/include tidycurl.c -lcurl -ltidy -o tidycurl + * + */ + +#include <stdio.h> +#include <tidy/tidy.h> +#include <tidy/buffio.h> +#include <curl/curl.h> + +/* curl write callback, to fill tidy's input buffer...  */ +uint write_cb(char *in, uint size, uint nmemb, TidyBuffer *out) +{ +  uint r; +  r = size * nmemb; +  tidyBufAppend( out, in, r ); +  return(r); +} + +/* Traverse the document tree */ +void dumpNode(TidyDoc doc, TidyNode tnod, int indent ) +{ +  TidyNode child; +  for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) +  { +    ctmbstr name = tidyNodeGetName( child ); +    if ( name ) +    { +      /* if it has a name, then it's an HTML tag ... */ +      TidyAttr attr; +      printf( "%*.*s%s ", indent, indent, "<", name); +      /* walk the attribute list */ +      for ( attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) { +        printf(tidyAttrName(attr)); +        tidyAttrValue(attr)?printf("=\"%s\" ", +                                   tidyAttrValue(attr)):printf(" "); +      } +      printf( ">\n"); +    } +    else { +      /* if it doesn't have a name, then it's probably text, cdata, etc... */ +      TidyBuffer buf; +      tidyBufInit(&buf); +      tidyNodeGetText(doc, child, &buf); +      printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:""); +      tidyBufFree(&buf); +    } +    dumpNode( doc, child, indent + 4 ); /* recursive */ +  } +} + + +int main(int argc, char **argv ) +{ +  CURL *curl; +  char curl_errbuf[CURL_ERROR_SIZE]; +  TidyDoc tdoc; +  TidyBuffer docbuf = {0}; +  TidyBuffer tidy_errbuf = {0}; +  int err; +  if ( argc == 2) { +    curl = curl_easy_init(); +    curl_easy_setopt(curl, CURLOPT_URL, argv[1]); +    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); +    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); +    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); +    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); + +    tdoc = tidyCreate(); +    tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ +    tidyOptSetInt(tdoc, TidyWrapLen, 4096); +    tidySetErrorBuffer( tdoc, &tidy_errbuf ); +    tidyBufInit(&docbuf); + +    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); +    err=curl_easy_perform(curl); +    if ( !err ) { +      err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ +      if ( err >= 0 ) { +        err = tidyCleanAndRepair(tdoc); /* fix any problems */ +        if ( err >= 0 ) { +          err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ +          if ( err >= 0 ) { +            dumpNode( tdoc, tidyGetRoot(tdoc), 0 ); /* walk the tree */ +            fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */ +          } +        } +      } +    } +    else +      fprintf(stderr, "%s\n", curl_errbuf); + +    /* clean-up */ +    curl_easy_cleanup(curl); +    tidyBufFree(&docbuf); +    tidyBufFree(&tidy_errbuf); +    tidyRelease(tdoc); +    return(err); + +  } +  else +    printf( "usage: %s <url>\n", argv[0] ); + +  return(0); +}  | 
