[All Packages] [Previous] [Next]
XML documents are made up of storage units called entities, which contain either parsed or unparsed data. Parsed data is made up of characters, some of which form character data, and some of which form markup. Markup encodes a description of the document's storage layout and logical structure. XML provides a mechanism to impose constraints on the storage layout and logical structure.
A software module called an XML processor is used to read XML documents and provide access to their content and structure. It is assumed that an XML processor is doing its work on behalf of another module, called the application.
This C implementation of the XML processor (or parser) followed the W3C XML specification (rev REC-xml-19980210) and included the required behavior of an XML processor in terms of how it must read XML data and the information it must provide to the application.
The following is the default behavior of this parser:
The memory callback functions memcb may be used if you wish to use your own memory allocation. If they are used, all of the functions should be specified.
The memory allocated for parameters passed to the SAX callbacks or for nodes and data stored with the DOM parse tree will not be freed until one of the following is done:
| xmlinit | Initialize XML parser |
| xmlclean | Clean up memory used during parse |
| xmlparse | Parse a file |
| xmlparsebuf | Parse a buffer |
| xmlterm | Shut down XML parser |
| createDocument | Create a new document |
| isStandalone | Return document's standalone flag |
typedef unsigned char oratext;
typedef struct xmlctx xmlctx;
Note: The contents of xmlctx are private and must not be accessed by users.
struct xmlmemcb
{
void *(*alloc)(void *ctx, size_t size);
void (*free)(void *ctx, void *ptr);
void *(*realloc)(void *ctx, void *ptr, size_t size);
};
typedef struct xmlmemcb xmlmemcb;
Note: This is the memory callback structure.
struct xmlsaxcb
{
sword (*startDocument)(void *ctx);
sword (*endDocument)(void *ctx);
sword (*startElement)(void *ctx, const oratext *name,
const struct xmlattrs *attrs);
sword (*endElement)(void *ctx, const oratext *name);
sword (*characters)(void *ctx, const oratext *ch, size_t len);
sword (*ignorableWhitespace)(void *ctx, const oratext *ch,
size_t len);
sword (*processingInstruction)(void *ctx, const oratext *target,
const oratext *data);
sword (*notationDecl)(void *ctx, const oratext *name,
const oratext *publicId,
const oratext *systemId);
sword (*unparsedEntityDecl)(void *ctx, const oratext *name,
const oratext *publicId,
const oratext *systemId,
const oratext *notationName);
sword (*nsStartElement)(void *ctx, const oratext *qname,
const oratext *local,
const oratext *namespace,
const struct xmlattrs *attrs);
/* The following 8 fields are reserved for future use. */
void (*empty1)();
void (*empty2)();
void (*empty3)();
void (*empty4)();
void (*empty5)();
void (*empty6)();
void (*empty7)();
void (*empty8)();
};
typedef struct xmlsaxcb xmlsaxcb;
Note: Callbacks for SAX-like API.
typedef unsigned int ub4;
typedef unsigned int uword;
xmlctx *xmlinit(uword *err, const oratext *encoding,
void (*msghdlr)(void *msgctx, const oratext *msg, ub4 errcode),
void *msgctx, const xmlsaxcb *saxcb, void *saxcbctx,
const xmlmemcb *memcb, void *memcbctx, const oratext *lang);
err (OUT)- The error, if any encoding (IN) - default character set encoding msghdlr (IN) - Error message handler function msgctx (IN) - Context for the error message handler saxcb (IN) - SAX callback structure filled with function pointers saxcbctx (IN) - Context for SAX callbacks memcb (IN) - Memory function callbacks memcbctx (IN) - Context for the memory function callbacks lang (IN) - Language for error messages
void xmlclean(xmlctx *ctx);
uword xmlparse(xmlctx *ctx, const oratext *filename, const oratext *encoding, ub4 flags);
ctx (IN/OUT) - The XML parser context filename (IN) - path to XML document encoding (IN) - default character set encoding flags (IN) - what options to use
uword xmlparsebuf(xmlctx *ctx, const oratext *buffer, size_t len, const oratext *encoding, ub4 flags);
ctx (IN/OUT) - The XML parser context buffer (IN) - file to be parsed len (IN) - length of the buffer encoding (IN) - default character set encoding flags (IN) - what options to use
uword xmlterm(xmlctx *ctx);
ctx (IN) - the XML parser context
xmlnode* createDocument(xmlctx *ctx)
ctx (IN) - the XML parser context
boolean isStandalone(xmlctx *ctx)
ctx (IN) - the XML parser context