/* ************************************************************************ */ /* */ /* Neko Standard Library */ /* Copyright (c)2005 Motion-Twin */ /* */ /* This library is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU Lesser General Public */ /* License as published by the Free Software Foundation; either */ /* version 2.1 of the License, or (at your option) any later version. */ /* */ /* This library is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ /* Lesser General Public License or the LICENSE file for more details. */ /* */ /* ************************************************************************ */ #include #include #ifndef NEKO_WINDOWS # include # undef strcmpi # define strcmpi(a,b) strcasecmp(a,b) #else # include #endif #define ERROR(msg) xml_error(xml,p,line,msg); // -------------- parsing -------------------------- typedef enum { IGNORE_SPACES, BEGIN, BEGIN_NODE, TAG_NAME, BODY, ATTRIB_NAME, EQUALS, ATTVAL_BEGIN, ATTRIB_VAL, CHILDS, CLOSE, WAIT_END, WAIT_END_RET, PCDATA, HEADER, COMMENT, DOCTYPE, CDATA, } STATE; extern field id_pcdata; extern field id_xml; extern field id_done; extern field id_comment; extern field id_cdata; extern field id_doctype; static void xml_error( const char *xml, const char *p, int *line, const char *msg ) { buffer b = alloc_buffer("Xml parse error : "); int l = (int)strlen(p); int nchars = 30; buffer_append(b,msg); buffer_append(b," at line "); val_buffer(b,alloc_int(*line)); buffer_append(b," : "); if( p != xml ) buffer_append(b,"..."); buffer_append_sub(b,p,(l < nchars)?l:nchars); if( l > nchars ) buffer_append(b,"..."); if( l == 0 ) buffer_append(b,""); bfailure(b); } static bool is_valid_char( int c ) { return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-'; } static void do_parse_xml( const char *xml, const char **lp, int *line, value callb, const char *parentname ) { STATE state = BEGIN; STATE next = BEGIN; field aname = (field)0; value attribs = NULL; value nodename = NULL; const char *start = NULL; const char *p = *lp; char c = *p; int nsubs = 0, nbrackets = 0; while( c ) { switch( state ) { case IGNORE_SPACES: switch( c ) { case '\n': case '\r': case '\t': case ' ': break; default: state = next; continue; } break; case BEGIN: switch( c ) { case '<': state = IGNORE_SPACES; next = BEGIN_NODE; break; default: start = p; state = PCDATA; continue; } break; case PCDATA: if( c == '<' ) { val_ocall1(callb,id_pcdata,copy_string(start,p-start)); nsubs++; state = IGNORE_SPACES; next = BEGIN_NODE; } break; case CDATA: if( c == ']' && p[1] == ']' && p[2] == '>' ) { val_ocall1(callb,id_cdata,copy_string(start,p-start)); nsubs++; p += 2; state = BEGIN; } break; case BEGIN_NODE: switch( c ) { case '!': if( p[1] == '[' ) { p += 2; if( (p[0] != 'C' && p[0] != 'c') || (p[1] != 'D' && p[1] != 'd') || (p[2] != 'A' && p[2] != 'a') || (p[3] != 'T' && p[3] != 't') || (p[4] != 'A' && p[4] != 'a') || (p[5] != '[') ) ERROR("Expected ': state = CHILDS; nsubs++; val_ocall2(callb,id_xml,nodename,attribs); break; default: state = ATTRIB_NAME; start = p; continue; } break; case ATTRIB_NAME: if( !is_valid_char(c) ) { value tmp; if( start == p ) ERROR("Expected attribute name"); tmp = copy_string(start,p-start); aname = val_id(val_string(tmp)); if( !val_is_null(val_field(attribs,aname)) ) ERROR("Duplicate attribute"); state = IGNORE_SPACES; next = EQUALS; continue; } break; case EQUALS: switch( c ) { case '=': state = IGNORE_SPACES; next = ATTVAL_BEGIN; break; default: ERROR("Expected ="); } break; case ATTVAL_BEGIN: switch( c ) { case '"': case '\'': state = ATTRIB_VAL; start = p; break; default: ERROR("Expected \""); } break; case ATTRIB_VAL: if( c == *start ) { value aval = copy_string(start+1,p-start-1); alloc_field(attribs,aname,aval); state = IGNORE_SPACES; next = BODY; } break; case CHILDS: *lp = p; do_parse_xml(xml,lp,line,callb,val_string(nodename)); p = *lp; start = p; state = BEGIN; break; case WAIT_END: switch( c ) { case '>': val_ocall0(callb,id_done); state = BEGIN; break; default : ERROR("Expected >"); } break; case WAIT_END_RET: switch( c ) { case '>': if( nsubs == 0 ) val_ocall1(callb,id_pcdata,alloc_string("")); val_ocall0(callb,id_done); *lp = p; return; default : ERROR("Expected >"); } break; case CLOSE: if( !is_valid_char(c) ) { if( start == p ) ERROR("Expected node name"); { value v = copy_string(start,p - start); if( strcmpi(parentname,val_string(v)) != 0 ) { buffer b = alloc_buffer("Expected "); ERROR(val_string(buffer_to_string(b))); } } state = IGNORE_SPACES; next = WAIT_END_RET; continue; } break; case COMMENT: if( c == '-' && p[1] == '-' && p[2] == '>' ) { val_ocall1(callb,id_comment,copy_string(start,p-start)); p += 2; state = BEGIN; } break; case DOCTYPE: if( c == '[' ) nbrackets++; else if( c == ']' ) nbrackets--; else if( c == '>' && nbrackets == 0 ) { val_ocall1(callb,id_doctype,copy_string(start,p-start)); state = BEGIN; } break; case HEADER: if( c == '?' && p[1] == '>' ) { p++; val_ocall1(callb,id_comment,copy_string(start,p-start)); state = BEGIN; } break; } c = *++p; if( c == '\n' ) (*line)++; } if( state == BEGIN ) { start = p; state = PCDATA; } if( parentname == NULL && state == PCDATA ) { if( p != start || nsubs == 0 ) val_ocall1(callb,id_pcdata,copy_string(start,p-start)); return; } ERROR("Unexpected end"); } // ---------------------------------------------- /**

Xml

The standard event-driven XML parser.

**/ /** parse_xml : xml:string -> events:object -> void The [parse_xml] parse a string and for each parsed element call the corresponding object method in [events] :
  • [void xml( name : string, attribs : object)] when an XML node is found
  • [void done()] when an XML node is closed
  • [void pcdata(string)] when PCData chars found
  • [void cdata(string)] when a CData session is found
  • [void comment(string)] when some comment or special header is found
You can then implement the events so they build the appropriate XML data structure needed by your language.
**/ static value parse_xml( value str, value callb ) { const char *p; int line = 0; val_check(str,string); val_check(callb,object); p = val_string(str); // skip BOM if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF ) p += 3; do_parse_xml(p,&p,&line,callb,NULL); return val_true; } DEFINE_PRIM(parse_xml,2); /* ************************************************************************ */