ADC Home > Reference Library > Reference > Darwin > Miscellaneous User Space API Reference
|
tidy.h - Defines HTML Tidy API implemented by tidy library. |
Includes: |
Public interface is const-correct and doesn't explicitly depend
on any globals. Thus, thread-safety may be introduced w/out
changing the interface.
Looking ahead to a C++ wrapper, C functions always pass
this-equivalent as 1st arg.
Copyright (c) 1998-2004 World Wide Web Consortium
(Massachusetts Institute of Technology, European Research
Consortium for Informatics and Mathematics, Keio University).
All Rights Reserved.
CVS Info :
$Author: rbraun $
$Date: 2004/05/04 20:05:14 $
$Revision: 1.1.1.1 $
Contributing Author(s):
Dave Raggett
The contributing author(s) would like to thank all those who
helped with testing, bug fixes and suggestions for improvements.
This wouldn't have been possible without your help.
COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be held
liable for any direct, indirect, special or consequential damages
arising out of any use of the software or documentation, even if
advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute
this source code, or portions hereof, documentation and executables,
for any purpose, without fee, subject to the following restrictions:
The copyright holders and contributing author(s) specifically
permit, without fee, and encourage the use of this source code
as a component for supporting the Hypertext Markup Language in
commercial products. If you use this source code in a product,
acknowledgment is not required but would be appreciated.
Created 2001-05-20 by Charles Reitzel
Updated 2002-07-01 by Charles Reitzel - 1st Implementation
opaque_type( TidyAttr) |
- opaque_type
- opaque_type
/** @struct TidyAttr ** Opaque attribute datatype */ opaque_type( TidyAttr );
** Opaque option datatype
opaque_type( TidyDoc) |
/** @struct TidyDoc ** Opaque document datatype */ opaque_type( TidyDoc );
@defgroup Opaque Opaque Types ** ** Cast to implementation types within lib. ** Reduces inter-dependencies/conflicts w/ application code. ** @{
opaque_type( TidyNode) |
- opaque_type
- opaque_type
/** @struct TidyNode ** Opaque node datatype */ opaque_type( TidyNode );
** Opaque option datatype
opaque_type( TidyOption) |
- opaque_type
- opaque_type
opaque_type( TidyOption );
** Opaque option datatype
tidyAccessWarningCount |
TIDY_EXPORT uint tidyAccessWarningCount( TidyDoc tdoc );
Number of Tidy accessibility warnings encountered.
tidyAttrGetHREF |
/** @defgroup AttrGet Attribute Retrieval ** ** Lookup an attribute from a given node ** @{ */ TIDY_EXPORT TidyAttr tidyAttrGetHREF( TidyNode tnod );
@} end AttrAsk group
tidyAttrGetId |
/** @defgroup Attribute Attribute Interrogation ** ** Get information about any given attribute. ** @{ */ TIDY_EXPORT TidyAttrId tidyAttrGetId( TidyAttr tattr );
@} End NodeAsk group
tidyCleanAndRepair |
/** @defgroup Clean Diagnostics and Repair ** ** @{ */ /** Execute configured cleanup and repair operations on parsed markup */ TIDY_EXPORT int tidyCleanAndRepair( TidyDoc tdoc );
@} End Parse group
tidyConfigErrorCount |
TIDY_EXPORT uint tidyConfigErrorCount( TidyDoc tdoc );
Number of Tidy configuration errors encountered.
tidyCreate |
TIDY_EXPORT TidyDoc tidyCreate( void);
@defgroup Basic Basic Operations ** ** Tidy public interface ** ** Several functions return an integer document status: ** **
** 0 -> SUCCESS ** >0 -> 1 == TIDY WARNING, 2 == TIDY ERROR ** <0 -> SEVERE ERROR **** The following is a short example program.
#include <tidy.h> #include <buffio.h> #include <stdio.h> #include <errno.h>** @{
int main(int argc, char **argv ) { const char* input = "<title>Foo</title><p>Foo!"; TidyBuffer output = {0}; TidyBuffer errbuf = {0}; int rc = -1; Bool ok;
TidyDoc tdoc = tidyCreate(); // Initialize "document" printf( "Tidying:\t\%s\\n", input );
ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, input ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc >= 0 ) rc = tidyRunDiagnostics( tdoc ); // Kvetch if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print
if ( rc >= 0 ) { if ( rc > 0 ) printf( "\\nDiagnostics:\\n\\n\%s", errbuf.bp ); printf( "\\nAnd here is the result:\\n\\n\%s", output.bp ); } else printf( "A severe error (\%d) occurred.\\n", rc );
tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); return rc; }
tidyDetectedGenericXml |
TIDY_EXPORT Bool tidyDetectedGenericXml( TidyDoc tdoc );
Input is generic XML (not HTML or XHTML)?
tidyDetectedHtmlVersion |
TIDY_EXPORT int tidyDetectedHtmlVersion( TidyDoc tdoc );
Detected HTML version: 0, 2, 3 or 4
tidyDetectedXhtml |
TIDY_EXPORT Bool tidyDetectedXhtml( TidyDoc tdoc );
Input is XHTML?
tidyErrorCount |
TIDY_EXPORT uint tidyErrorCount( TidyDoc tdoc );
Number of Tidy errors encountered. If > 0, output is suppressed ** unless TidyForceOutput is set.
tidyErrorSummary |
TIDY_EXPORT void tidyErrorSummary( TidyDoc tdoc );
Write more complete information about errors to current error sink.
tidyGeneralInfo |
TIDY_EXPORT void tidyGeneralInfo( TidyDoc tdoc );
Write more general information about markup to current error sink.
tidyGetAppData |
TIDY_EXPORT ulong tidyGetAppData( TidyDoc tdoc );
Get application data set previously
tidyGetByte |
TIDY_EXPORT uint tidyGetByte( TidyInputSource*source );
Helper: get next byte from input source
tidyGetNextOption |
TIDY_EXPORT TidyOption tidyGetNextOption( TidyDoc tdoc, TidyIterator*pos );
Get next Option
tidyGetOption |
TIDY_EXPORT TidyOption tidyGetOption( TidyDoc tdoc, TidyOptionId optId );
Lookup option by ID
tidyGetOptionByName |
TIDY_EXPORT TidyOption tidyGetOptionByName( TidyDoc tdoc, ctmbstr optnam );
Lookup option by name
tidyGetOptionList |
/** Example: <pre> TidyIterator itOpt = tidyGetOptionList( tdoc ) while ( itOpt ) { TidyOption opt = tidyGetNextOption( tdoc, &itOpt ) .. get/set option values .. } </pre> */ TIDY_EXPORT TidyIterator tidyGetOptionList( TidyDoc tdoc );
Get iterator for list of option
tidyGetRoot |
/** @defgroup Tree Document Tree ** ** A parsed and, optionally, repaired document is ** represented by Tidy as a Tree, much like a W3C DOM. ** This tree may be traversed using these functions. ** The following snippet gives a basic idea how these ** functions can be used. ** <pre> void dumpNode( TidyNode tnod, int indent ) { TidyNode child for ( child = tidyGetChild(tnod) child child = tidyGetNext(child) ) { ctmbstr name = tidyNodeGetName( child ) if ( !name ) { switch ( tidyNodeGetType(child) ) { case TidyNode_Root: name = "Root" break case TidyNode_DocType: name = "DOCTYPE" break case TidyNode_Comment: name = "Comment" break case TidyNode_ProcIns: name = "Processing Instruction" break case TidyNode_Text: name = "Text" break case TidyNode_CDATA: name = "CDATA" break case TidyNode_Section: name = "XML Section" break case TidyNode_Asp: name = "ASP" break case TidyNode_Jste: name = "JSTE" break case TidyNode_Php: name = "PHP" break case TidyNode_XmlDecl: name = "XML Declaration" break case TidyNode_Start: case TidyNode_End: case TidyNode_StartEnd: default: assert( name != NULL ) // Shouldn't get here break } } assert( name != NULL ) printf( "\%*.*sNode: \%s\\n", indent, indent, tidy ) dumpNode( child, indent + 4 ) } } void dumpDoc( TidyDoc tdoc ) { dumpNode( tidyGetRoot(tdoc), 0 ) } void dumpBody( TidyDoc tdoc ) { dumpNode( tidyGetBody(tdoc), 0 ) } </pre> @{ */ TIDY_EXPORT TidyNode tidyGetRoot( TidyDoc tdoc );
@} end Basic group (again)
tidyInitSink |
TIDY_EXPORT Bool tidyInitSink( TidyOutputSink*sink, void*snkData, TidyPutByteFunc pbFunc );
Facilitates user defined sinks by providing ** an entry point to marshal pointers-to-functions. ** Needed by .NET and possibly other language bindings.
tidyInitSource |
TIDY_EXPORT Bool tidyInitSource( TidyInputSource*source, void*srcData, TidyGetByteFunc gbFunc, TidyUngetByteFunc ugbFunc, TidyEOFFunc endFunc );
Facilitates user defined source by providing ** an entry point to marshal pointers-to-functions. ** Needed by .NET and possibly other language bindings.
tidyIsEOF |
TIDY_EXPORT Bool tidyIsEOF( TidyInputSource*source );
Helper: check if input source at end
tidyLoadConfig |
TIDY_EXPORT int tidyLoadConfig( TidyDoc tdoc, ctmbstr configFile );
Load an ASCII Tidy configuration file
tidyLoadConfigEnc |
TIDY_EXPORT int tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr configFile, ctmbstr charenc );
Load a Tidy configuration file with the specified character encoding
tidyNodeGetType |
/** @defgroup NodeAsk Node Interrogation ** ** Get information about any givent node. ** @{ */ /* Node info */ TIDY_EXPORT TidyNodeType tidyNodeGetType( TidyNode tnod );
@} end Tree group
tidyOptCopyConfig |
TIDY_EXPORT Bool tidyOptCopyConfig( TidyDoc tdocTo, TidyDoc tdocFrom );
Copy current configuration settings from one document to another
tidyOptDiffThanDefault |
TIDY_EXPORT Bool tidyOptDiffThanDefault( TidyDoc tdoc );
Any settings different than default?
tidyOptDiffThanSnapshot |
TIDY_EXPORT Bool tidyOptDiffThanSnapshot( TidyDoc tdoc );
Any settings different than snapshot?
tidyOptGetBool |
TIDY_EXPORT Bool tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId );
Get current Option value as a Boolean flag
tidyOptGetCategory |
TIDY_EXPORT TidyConfigCategory tidyOptGetCategory( TidyOption opt );
Get category of given Option
tidyOptGetCurrPick |
TIDY_EXPORT ctmbstr tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId);
Get current pick list value for option by ID. Useful for enum types.
tidyOptGetDeclTagList |
TIDY_EXPORT TidyIterator tidyOptGetDeclTagList( TidyDoc tdoc );
Iterate over user declared tags
tidyOptGetDefault |
TIDY_EXPORT ctmbstr tidyOptGetDefault( TidyOption opt );
Get default value of given Option as a string
tidyOptGetDefaultBool |
TIDY_EXPORT Bool tidyOptGetDefaultBool( TidyOption opt );
Get default value of given Option as a Boolean value
tidyOptGetDefaultInt |
TIDY_EXPORT ulong tidyOptGetDefaultInt( TidyOption opt );
Get default value of given Option as an unsigned integer
tidyOptGetEncName |
TIDY_EXPORT ctmbstr tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId );
Get character encoding name. Used with TidyCharEncoding, ** TidyOutCharEncoding, TidyInCharEncoding
tidyOptGetId |
TIDY_EXPORT TidyOptionId tidyOptGetId( TidyOption opt );
Get ID of given Option
tidyOptGetIdForName |
TIDY_EXPORT TidyOptionId tidyOptGetIdForName( ctmbstr optnam );
Get option ID by name
tidyOptGetInt |
TIDY_EXPORT ulong tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId );
Get current Option value as an integer
tidyOptGetName |
TIDY_EXPORT ctmbstr tidyOptGetName( TidyOption opt );
Get name of given Option
tidyOptGetNextDeclTag |
TIDY_EXPORT ctmbstr tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId, TidyIterator*iter );
Get next declared tag of specified type: TidyInlineTags, TidyBlockTags, ** TidyEmptyTags, TidyPreTags
tidyOptGetNextPick |
TIDY_EXPORT ctmbstr tidyOptGetNextPick( TidyOption opt, TidyIterator*pos );
Get next string value of Option "pick list"
tidyOptGetPickList |
TIDY_EXPORT TidyIterator tidyOptGetPickList( TidyOption opt );
Iterate over Option "pick list"
tidyOptGetType |
TIDY_EXPORT TidyOptionType tidyOptGetType( TidyOption opt );
Get datatype of given Option
tidyOptGetValue |
TIDY_EXPORT ctmbstr tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId );
Get current Option value as a string
tidyOptIsReadOnly |
TIDY_EXPORT Bool tidyOptIsReadOnly( TidyOption opt );
Is Option read-only?
tidyOptParseValue |
TIDY_EXPORT Bool tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val );
Set named Option value as a string. Good if not sure of type.
tidyOptResetAllToDefault |
TIDY_EXPORT Bool tidyOptResetAllToDefault( TidyDoc tdoc );
Reset all options to their default values
tidyOptResetToDefault |
TIDY_EXPORT Bool tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId opt );
Reset option to default value by ID
tidyOptResetToSnapshot |
TIDY_EXPORT Bool tidyOptResetToSnapshot( TidyDoc tdoc );
Reset config settings to snapshot (after document processing)
tidyOptSaveFile |
/** @addtogroup Basic ** @{ */ /** Save current settings to named file. */ TIDY_EXPORT int tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil );
@} end Save group
tidyOptSaveSink |
TIDY_EXPORT int tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink*sink );
Save current settings to given output sink. Only non-default values are written.
tidyOptSetBool |
TIDY_EXPORT Bool tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val );
Set Option value as a Boolean flag
tidyOptSetInt |
TIDY_EXPORT Bool tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val );
Set Option value as an integer
tidyOptSetValue |
TIDY_EXPORT Bool tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val );
Set Option value as a string
tidyOptSnapshot |
TIDY_EXPORT Bool tidyOptSnapshot( TidyDoc tdoc );
Take a snapshot of current config settings
tidyParseBuffer |
TIDY_EXPORT int tidyParseBuffer( TidyDoc tdoc, TidyBuffer*buf );
Parse markup in given buffer
tidyParseFile |
/* TODO: Catalog all messages for easy translation TIDY_EXPORT ctmbstr tidyLookupMessage( int errorNo ) */ /** @defgroup Parse Document Parse ** ** Parse markup from a given input source. String and filename ** functions added for convenience. HTML/XHTML version determined ** from input. ** @{ */ /** Parse markup in named file */ TIDY_EXPORT int tidyParseFile( TidyDoc tdoc, ctmbstr filename );
@} end Memory group
tidyParseSource |
TIDY_EXPORT int tidyParseSource( TidyDoc tdoc, TidyInputSource*source);
Parse markup in given generic input source
tidyParseStdin |
TIDY_EXPORT int tidyParseStdin( TidyDoc tdoc );
Parse markup from the standard input
tidyParseString |
TIDY_EXPORT int tidyParseString( TidyDoc tdoc, ctmbstr content );
Parse markup in given string
tidyPutByte |
TIDY_EXPORT void tidyPutByte( TidyOutputSink*sink, uint byteValue );
Helper: send a byte to output
tidyReleaseDate |
TIDY_EXPORT ctmbstr tidyReleaseDate( void);
Get release date (version) for current library
tidyRunDiagnostics |
TIDY_EXPORT int tidyRunDiagnostics( TidyDoc tdoc );
Run configured diagnostics on parsed and repaired markup. ** Must call tidyCleanAndRepair() first.
tidySaveBuffer |
TIDY_EXPORT int tidySaveBuffer( TidyDoc tdoc, TidyBuffer*buf );
Save to given TidyBuffer object
tidySaveFile |
/** @defgroup Save Document Save Functions ** ** Save currently parsed document to the given output sink. File name ** and string/buffer functions provided for convenience. ** @{ */ /** Save to named file */ TIDY_EXPORT int tidySaveFile( TidyDoc tdoc, ctmbstr filename );
@} end Clean group
tidySaveSink |
TIDY_EXPORT int tidySaveSink( TidyDoc tdoc, TidyOutputSink*sink );
Save to given generic output sink
tidySaveStdout |
TIDY_EXPORT int tidySaveStdout( TidyDoc tdoc );
Save to standard output (FILE*)
tidySaveString |
TIDY_EXPORT int tidySaveString( TidyDoc tdoc, tmbstr buffer, uint*buflen );
Save document to application buffer. If buffer is not big enough, ** ENOMEM will be returned and the necessary buffer size will be placed ** in *buflen.
tidySetAppData |
TIDY_EXPORT void tidySetAppData( TidyDoc tdoc, ulong appData );
Let application store a chunk of data w/ each Tidy instance. ** Useful for callbacks.
tidySetCharEncoding |
TIDY_EXPORT int tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam );
Set the input/output character encoding for parsing markup. ** Values include: ascii, latin1, raw, utf8, iso2022, mac, ** win1252, utf16le, utf16be, utf16, big5 and shiftjis. Case in-sensitive.
tidySetErrorBuffer |
TIDY_EXPORT int tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer*errbuf );
Set error sink to given buffer
tidySetErrorFile |
TIDY_EXPORT FILE* tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam );
Set error sink to named file
tidySetErrorSink |
TIDY_EXPORT int tidySetErrorSink( TidyDoc tdoc, TidyOutputSink*sink );
Set error sink to given generic sink
tidySetFreeCall |
TIDY_EXPORT Bool tidySetFreeCall( TidyFree ffree );
Give Tidy a free() replacement
tidySetInCharEncoding |
TIDY_EXPORT int tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam );
Set the input encoding for parsing markup. ** As for tidySetCharEncoding but only affects the input encoding *
tidySetMallocCall |
TIDY_EXPORT Bool tidySetMallocCall( TidyMalloc fmalloc );
Give Tidy a malloc() replacement
tidySetOutCharEncoding |
TIDY_EXPORT int tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam );
Set the output encoding. *
tidySetPanicCall |
TIDY_EXPORT Bool tidySetPanicCall( TidyPanic fpanic );
Give Tidy an "out of memory" handler
tidySetReallocCall |
TIDY_EXPORT Bool tidySetReallocCall( TidyRealloc frealloc );
Give Tidy a realloc() replacement
tidySetReportFilter |
TIDY_EXPORT Bool tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filtCallback );
Give Tidy a filter callback to use
tidyStatus |
TIDY_EXPORT int tidyStatus( TidyDoc tdoc );
Get status of current document.
tidyUngetByte |
TIDY_EXPORT void tidyUngetByte( TidyInputSource*source, uint byteValue );
Helper: unget byte back to input source
tidyWarningCount |
TIDY_EXPORT uint tidyWarningCount( TidyDoc tdoc );
Number of Tidy warnings encountered.
TidyEOFFunc |
typedef Bool (*TidyEOFFunc)( ulong sourceData );
Input Callback: is end of input?
TidyFree |
typedef void (*TidyFree)( void*buf );
Callback for "free" replacement
TidyGetByteFunc |
/** @defgroup IO I/O and Messages ** ** By default, Tidy will define, create and use ** instances of input and output handlers for ** standard C buffered I/O (i.e. FILE* stdin, ** FILE* stdout and FILE* stderr for content ** input, content output and diagnostic output, ** respectively. A FILE* cfgFile input handler ** will be used for config files. Command line ** options will just be set directly. ** ** @{ */ /***************** Input Source */ /** Input Callback: get next byte of input */ typedef int (*TidyGetByteFunc)( ulong sourceData );
@} end Configuration group
TidyMalloc |
/** @defgroup Memory Memory Allocation ** ** By default, Tidy will use its own wrappers ** around standard C malloc/free calls. ** These wrappers will abort upon any failures. ** If any are set, all must be set. ** Pass NULL to clear previous setting. ** ** May be used to set environment-specific allocators ** such as used by web server plugins, etc. ** ** @{ */ /** Callback for "malloc" replacement */ typedef void* (*TidyMalloc)( size_t len );
@} end IO group
TidyOptCallback |
/** @defgroup Configuration Configuration Options ** ** Functions for getting and setting Tidy configuration options. ** @{ */ /** Applications using TidyLib may want to augment command-line and ** configuration file options. Setting this callback allows an application ** developer to examine command-line and configuration file options after ** TidyLib has examined them and failed to recognize them. */ typedef Bool (*TidyOptCallback)( ctmbstr option, ctmbstr value );
@} end Basic group
TidyPanic |
typedef void (*TidyPanic)( ctmbstr mssg );
Callback for "out of memory" panic state
TidyPutByteFunc |
typedef void (*TidyPutByteFunc)( ulong sinkData, byte bt );
Output callback: send a byte to output
TidyRealloc |
typedef void* (*TidyRealloc)( void*buf, size_t len );
Callback for "realloc" replacement
TidyReportFilter |
typedef Bool (*TidyReportFilter)( TidyDoc tdoc, TidyReportLevel lvl, uint line, uint col, ctmbstr mssg );
Callback to filter messages by diagnostic level: ** info, warning, etc. Just set diagnostic output ** handler to redirect all diagnostics output. Return true ** to proceed with output, false to cancel.
TidyUngetByteFunc |
typedef void (*TidyUngetByteFunc)( ulong sourceData, byte bt );
Input Callback: unget a byte of input
EndOfStream |
#define EndOfStream
End of input "character"