5#ifndef MHTML_PARSER_TAGS_INIT_SZ 
    6#  define MHTML_PARSER_TAGS_INIT_SZ 10 
   13#ifndef MHTML_DUMP_LINE_SZ 
   14#  define MHTML_DUMP_LINE_SZ 255 
   17#ifndef MHTML_SRC_HREF_SZ_MAX 
   18#  define MHTML_SRC_HREF_SZ_MAX 128 
   21#ifndef MHTML_TRACE_LVL 
   22#  define MHTML_TRACE_LVL 0 
   26#define MHTML_TAG_FLAG_STYLE  0x02 
   28#define MHTML_INPUT_TYPE_BUTTON  0x01 
   33#define MHTML_ATTRIB_TABLE( f ) \ 
   43#define MHTML_TAG_TABLE( f ) \ 
   44   f(  0, NONE, void* none;, NONE ) \ 
   45   f(  1, BODY, void* none;, BLOCK ) \ 
   46   f(  2, DIV, void* none;, BLOCK ) \ 
   47   f(  3, HEAD, void* none;, NONE ) \ 
   48   f(  4, HTML, void* none;, BLOCK ) \ 
   49   f(  5, TEXT, mdata_strpool_idx_t content_idx; size_t content_sz;, INLINE ) \ 
   50   f(  6, TITLE, mdata_strpool_idx_t content_idx; size_t content_sz;, NONE ) \ 
   51   f(  7, SPAN, void* none;, INLINE ) \ 
   52   f(  8, BR, void* none;, BLOCK ) \ 
   53   f(  9, STYLE, void* none;, NONE ) \ 
   54   f( 10, IMG, char src[MHTML_SRC_HREF_SZ_MAX + 1]; size_t src_sz;, BLOCK ) \ 
   55   f( 11, INPUT, uint8_t input_type; char name[MCSS_ID_SZ_MAX + 1]; size_t name_sz; char value[MCSS_ID_SZ_MAX + 1]; size_t value_sz;, INLINE ) 
   57#define MHTML_PARSER_PSTATE_TABLE( f ) \ 
   58   f( MHTML_PSTATE_NONE, 0 ) \ 
   59   f( MHTML_PSTATE_ELEMENT, 1 ) \ 
   60   f( MHTML_PSTATE_ATTRIB_KEY, 2 ) \ 
   61   f( MHTML_PSTATE_ATTRIB_VAL, 3 ) \ 
   62   f( MHTML_PSTATE_END_ELEMENT, 4 ) \ 
   63   f( MHTML_PSTATE_STRING, 5 ) \ 
   64   f( MHTML_PSTATE_STYLE, 6 ) 
   69#define mhtml_tag( parser, idx ) (&((parser)->tags[idx])) 
   71#define mhtml_tag_parent( parser, idx ) \ 
   72   (0 <= (parser)->tags[idx].parent ? \ 
   73      (&((parser)->tags[(parser)->tags[idx].parent]])) : NULL) 
   75#define mhtml_tag_child( parser, idx ) \ 
   76   (0 <= (parser)->tags[idx].first_child ? \ 
   77      (&((parser)->tags[(parser)->tags[idx].first_child]])) : NULL) 
   79#define mhtml_tag_sibling( parser, idx ) \ 
   80   (0 <= (parser)->tags[idx].next_sibling ? \ 
   81      (&((parser)->tags[(parser)->tags[idx].next_sibling]])) : NULL) 
   84#define mhtml_parser_pstate( parser ) \ 
   85   mparser_pstate( &((parser)->base) ) 
   87#ifdef MPARSER_TRACE_NAMES 
   88#  define mhtml_parser_pstate_push( parser, new_pstate ) \ 
   89      mparser_pstate_push( \ 
   90         "mhtml", &((parser)->base), new_pstate, gc_mhtml_pstate_names )
 
   92#  define mhtml_parser_pstate_pop( parser ) \ 
   94         "mhtml", &((parser)->base), gc_mhtml_pstate_names )
 
   96#  define mhtml_parser_pstate_push( parser, new_pstate ) \ 
   97      mparser_pstate_push( "mhtml", &((parser)->base), new_pstate )
 
   99#  define mhtml_parser_pstate_pop( parser ) \ 
  100      mparser_pstate_pop( "mhtml", &((parser)->base) )
 
  103#define mhtml_parser_invalid_c( parser, c, retval ) \ 
  104   mparser_invalid_c( mhtml, &((parser)->base), c, retval ) 
  106#define mhtml_parser_reset_token( parser ) \ 
  107   mparser_reset_token( "mhtml", &((parser)->base) )
 
  109#define mhtml_parser_append_token( parser, c ) \ 
  110   mparser_append_token( "mhtml", &((parser)->base), c )
 
  112#define mhtml_parser_set_tag_iter( parser, iter ) \ 
  113   debug_printf( MHTML_TRACE_LVL, "setting tag_iter to: " SSIZE_T_FMT \
 
  114      " (previously: " SSIZE_T_FMT ")", (ssize_t)iter, (parser)->tag_iter ); \
 
  115   (parser)->tag_iter = iter;
 
  117#define mhtml_parser_is_locked( parser ) (NULL != (parser)->tags) 
  124   ssize_t next_sibling;
 
  127   char classes[MCSS_CLASS_SZ_MAX + 1];
 
  130   char id[MCSS_ID_SZ_MAX + 1];
 
  134#define MHTML_TAG_TABLE_STRUCT( tag_id, tag_name, fields, disp ) \ 
  135   struct MHTML_TAG_ ## tag_name { \ 
  136      struct MHTML_TAG_BASE base; \ 
  140MHTML_TAG_TABLE( MHTML_TAG_TABLE_STRUCT )
 
  142#define MHTML_TAG_TABLE_UNION_FIELD( tag_id, tag_name, fields, disp ) \ 
  143   struct MHTML_TAG_ ## tag_name tag_name; 
  147   MHTML_TAG_TABLE( MHTML_TAG_TABLE_UNION_FIELD )
 
  159   struct MCSS_PARSER styler;
 
  178#define MHTML_PSTATE_TABLE_CONST( name, idx ) \ 
  179   MAUG_CONST uint8_t SEG_MCONST name = idx; 
  181MHTML_PARSER_PSTATE_TABLE( MHTML_PSTATE_TABLE_CONST )
 
  183MPARSER_PSTATE_NAMES( MHTML_PARSER_PSTATE_TABLE, mhtml )
 
  185#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \ 
  186   MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name = tag_id; 
  188MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
 
  190#define MHTML_TAG_TABLE_NAMES( tag_id, tag_name, fields, disp ) \ 
  193MAUG_CONST 
char* SEG_MCONST gc_mhtml_tag_names[] = {
 
  194   MHTML_TAG_TABLE( MHTML_TAG_TABLE_NAMES )
 
  198#define MHTML_ATTRIB_TABLE_NAME( name, idx ) \ 
  201static MAUG_CONST 
char* SEG_MCONST gc_mhtml_attrib_names[] = {
 
  202   MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME )
 
  206#define MHTML_ATTRIB_TABLE_NAME_CONST( attrib_name, attrib_id ) \ 
  207   MAUG_CONST uint16_t SEG_MCONST MHTML_ATTRIB_KEY_ ## attrib_name = attrib_id; 
  209MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME_CONST )
 
  215   debug_printf( MHTML_TRACE_LVL, 
"freeing HTML parser..." );
 
  217   mdata_strpool_free( &(parser->strpool) );
 
  222      tag_iter = mdata_vector_get( &(parser->tags), 0, 
union MHTML_TAG );
 
  223      assert( NULL != tag_iter );
 
  232   mcss_parser_free( &(parser->styler) );
 
  234   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  238   mdata_vector_free( &(parser->tags) );
 
  248   assert( parser->tag_iter >= 0 );
 
  250   tag_iter = mdata_vector_get(
 
  251      &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  252   assert( NULL != tag_iter );
 
  254   mhtml_parser_set_tag_iter( parser, tag_iter->base.parent );
 
  256   if( 0 <= parser->tag_iter ) {
 
  257      debug_printf( MHTML_TRACE_LVL,
 
  258         "moved iter back to tag %s (" SIZE_T_FMT 
")",
 
  259         gc_mhtml_tag_names[tag_iter->base.type], parser->tag_iter );
 
  261      debug_printf( MHTML_TRACE_LVL, 
"moved iter back to root (-1)" );
 
  273   ssize_t new_tag_idx = -1;
 
  274   ssize_t next_sibling_idx = -1;
 
  279   maug_mzero( &tag_new, 
sizeof( 
union MHTML_TAG ) );
 
  280   tag_new.base.parent = -1;
 
  281   tag_new.base.first_child = -1;
 
  282   tag_new.base.next_sibling = -1;
 
  283   tag_new.base.style = -1;
 
  288      &(parser->tags), &tag_new, 
sizeof( 
union MHTML_TAG ) );
 
  289   if( 0 > new_tag_idx ) {
 
  290      retval = mdata_retval( new_tag_idx );
 
  295   p_tag_new = mdata_vector_get(
 
  296      &(parser->tags), new_tag_idx, 
union MHTML_TAG );
 
  297   assert( NULL != p_tag_new );
 
  299   if( 0 > parser->tag_iter ) {
 
  300      mhtml_parser_set_tag_iter( parser, new_tag_idx );
 
  305   p_tag_iter = mdata_vector_get(
 
  306      &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  307   assert( NULL != p_tag_iter );
 
  310   p_tag_new->base.parent = parser->tag_iter;
 
  313   if( 0 > p_tag_iter->base.first_child ) {
 
  314      debug_printf( MHTML_TRACE_LVL,
 
  315         "zxzx attached " SSIZE_T_FMT 
" as first child to  " 
  316         SSIZE_T_FMT, new_tag_idx, parser->tag_iter );
 
  317      p_tag_iter->base.first_child = new_tag_idx;
 
  320      next_sibling_idx = p_tag_iter->base.first_child;
 
  321      p_tag_iter = mdata_vector_get(
 
  322         &(parser->tags), next_sibling_idx, 
union MHTML_TAG );
 
  323      while( NULL != p_tag_iter && 0 <= p_tag_iter->base.next_sibling ) {
 
  324         next_sibling_idx = p_tag_iter->base.next_sibling;
 
  325         p_tag_iter = mdata_vector_get(
 
  326            &(parser->tags), next_sibling_idx, 
union MHTML_TAG );
 
  328      assert( NULL != p_tag_iter );
 
  329      p_tag_iter->base.next_sibling = new_tag_idx;
 
  330      debug_printf( MHTML_TRACE_LVL,
 
  331         "attached " SSIZE_T_FMT 
" as next sibling to  " 
  332         SSIZE_T_FMT, new_tag_idx, next_sibling_idx );
 
  335   debug_printf( MHTML_TRACE_LVL,
 
  336      "pushed new tag " SSIZE_T_FMT 
" under " SSIZE_T_FMT,
 
  337      new_tag_idx, p_tag_new->base.parent );
 
  339   mhtml_parser_set_tag_iter( parser, new_tag_idx );
 
  353   mparser_token_upper( &((parser)->base), i );
 
  355   if( 0 == strncmp( 
"STYLE", parser->base.token, 6 ) ) {
 
  360      parser->
tag_flags |= MHTML_TAG_FLAG_STYLE;
 
  364   retval = mhtml_push_tag( parser );
 
  365   maug_cleanup_if_not_ok();
 
  369   p_tag_iter = mdata_vector_get(
 
  370      &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  371   assert( NULL != p_tag_iter );
 
  375   while( 
'\0' != gc_mhtml_tag_names[i][0] ) {
 
  377         parser->base.token_sz == maug_strlen( gc_mhtml_tag_names[i] ) &&
 
  379            gc_mhtml_tag_names[i], parser->base.token, parser->base.token_sz )
 
  381         debug_printf( MHTML_TRACE_LVL,
 
  382            "new tag (" SSIZE_T_FMT 
") type: %s",
 
  383            parser->tag_iter, gc_mhtml_tag_names[i] );
 
  384         p_tag_iter->base.type = i;
 
  386         if( MHTML_TAG_TYPE_BODY == i ) {
 
  390            assert( -1 == parser->body_idx );
 
  391            parser->body_idx = parser->tag_iter;
 
  392            debug_printf( MHTML_TRACE_LVL,
 
  393               "set body index to: " SSIZE_T_FMT,
 
  402   error_printf( 
"could not find type for new tag (" SSIZE_T_FMT 
")",
 
  407   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  419   retval = mhtml_push_tag( parser );
 
  420   maug_cleanup_if_not_ok();
 
  424   p_tag_iter = mdata_vector_get(
 
  425      &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  426   assert( NULL != p_tag_iter );
 
  429      MHTML_TAG_FLAG_STYLE == (MHTML_TAG_FLAG_STYLE & 
 
  430         p_tag_iter->base.flags)
 
  432      p_tag_iter->base.type = MHTML_TAG_TYPE_STYLE;
 
  434      p_tag_iter->base.type = MHTML_TAG_TYPE_TEXT;
 
  437   if( MHTML_TAG_TYPE_STYLE == p_tag_iter->base.type ) {
 
  439      debug_printf( MHTML_TRACE_LVL, 
"parsing STYLE tag..." );
 
  440      for( ; parser->base.token_sz > i ; i++ ) {
 
  441         retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
 
  442         maug_cleanup_if_not_ok();
 
  444      debug_printf( 1, 
"out of style characters..." );
 
  445      mcss_parser_flush( &(parser->styler) );
 
  446      mcss_parser_reset( &(parser->styler) );
 
  449      while( 
' ' == parser->base.token[parser->base.token_sz - 1] ) {
 
  450         parser->base.token_sz--;
 
  454      p_tag_iter->TEXT.content_idx = mdata_strpool_append(
 
  455         &(parser->strpool), parser->base.token, parser->base.token_sz,
 
  456         MDATA_STRPOOL_FLAG_DEDUPE );
 
  458         p_tag_iter->TEXT.content_idx, 0, SIZE_T_FMT, MERROR_ALLOC );
 
  459      p_tag_iter->TEXT.content_sz = parser->base.token_sz;
 
  462   debug_printf( 1, 
"done processing tag contents..." );
 
  466   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  477   debug_printf( MHTML_TRACE_LVL, 
"attrib: %s", parser->base.token );
 
  479   mparser_token_upper( &((parser)->base), i );
 
  483   while( 
'\0' != gc_mhtml_attrib_names[i][0] ) {
 
  485         parser->base.token_sz == maug_strlen( gc_mhtml_attrib_names[i] ) &&
 
  487            gc_mhtml_attrib_names[i], parser->base.token, parser->base.token_sz )
 
  490            MHTML_TRACE_LVL, 
"new attrib type: %s", gc_mhtml_attrib_names[i] );
 
  491         parser->attrib_key = i;
 
  497   error_printf( 
"unknown attrib: %s", parser->base.token );
 
  511   p_tag_iter = mdata_vector_get(
 
  512      &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  513   assert( NULL != p_tag_iter );
 
  515   if( MHTML_ATTRIB_KEY_STYLE == parser->attrib_key ) {
 
  516      debug_printf( MHTML_TRACE_LVL, 
"style: %s", parser->base.token );
 
  521      retval = mcss_push_style( &(parser->styler), MCSS_SELECT_NONE, NULL, 0 );
 
  522      maug_cleanup_if_not_ok();
 
  526      p_tag_iter->base.style =
 
  529      for( ; parser->base.token_sz > i ; i++ ) {
 
  530         retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
 
  531         maug_cleanup_if_not_ok();
 
  534      debug_printf( 1, 
"out of style characters..." );
 
  535      mcss_parser_flush( &(parser->styler) );
 
  539   } 
else if( MHTML_ATTRIB_KEY_CLASS == parser->attrib_key ) {
 
  541         p_tag_iter->base.classes,
 
  544      p_tag_iter->base.classes_sz = parser->base.token_sz;
 
  546   } 
else if( MHTML_ATTRIB_KEY_ID == parser->attrib_key ) {
 
  551      p_tag_iter->base.id_sz = parser->base.token_sz;
 
  553   } 
else if( MHTML_ATTRIB_KEY_SRC == parser->attrib_key ) {
 
  558         MHTML_SRC_HREF_SZ_MAX );
 
  559      p_tag_iter->IMG.src_sz = parser->base.token_sz;
 
  561   } 
else if( MHTML_ATTRIB_KEY_TYPE == parser->attrib_key ) {
 
  564      maug_strncpy( parser->base.token, 
"button", 7 );
 
  566      p_tag_iter->INPUT.input_type =
 
  567         MHTML_INPUT_TYPE_BUTTON;
 
  569   } 
else if( MHTML_ATTRIB_KEY_NAME == parser->attrib_key ) {
 
  572         p_tag_iter->INPUT.name,
 
  575      p_tag_iter->INPUT.name_sz = parser->base.token_sz;
 
  577   } 
else if( MHTML_ATTRIB_KEY_VALUE == parser->attrib_key ) {
 
  580         p_tag_iter->INPUT.value,
 
  583      p_tag_iter->INPUT.value_sz = parser->base.token_sz;
 
  588   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  598   size_t tag_iter_type = 0;
 
  602      if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  603         if( 0 < parser->base.token_sz ) {
 
  604            retval = mhtml_push_text_tag( parser );
 
  605            maug_cleanup_if_not_ok();
 
  609            p_tag_iter = mdata_vector_get(
 
  610               &(parser->tags), parser->tag_iter, 
union MHTML_TAG );
 
  611            assert( NULL != p_tag_iter );
 
  612            tag_iter_type = p_tag_iter->base.type;
 
  620               MHTML_TAG_TYPE_STYLE != tag_iter_type
 
  623               retval = mhtml_pop_tag( parser );
 
  624               maug_cleanup_if_not_ok();
 
  627         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ELEMENT );
 
  628         maug_cleanup_if_not_ok();
 
  629         mhtml_parser_reset_token( parser );
 
  632         mhtml_parser_invalid_c( parser, c, retval );
 
  637      if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
 
  638         retval = mhtml_push_element_tag( parser );
 
  639         maug_cleanup_if_not_ok();
 
  640         mhtml_parser_pstate_pop( parser );
 
  641         mhtml_parser_reset_token( parser );
 
  643      } 
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
 
  644         mhtml_parser_pstate_pop( parser );
 
  645         assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
 
  646         mhtml_parser_pstate_pop( parser ); 
 
  647         mhtml_parser_reset_token( parser );
 
  649      } 
else if( MHTML_PSTATE_END_ELEMENT == mhtml_parser_pstate( parser ) ) {
 
  651         retval = mhtml_pop_tag( parser );
 
  652         maug_cleanup_if_not_ok();
 
  654         mhtml_parser_pstate_pop( parser );
 
  655         if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
 
  656            mhtml_parser_pstate_pop( parser );
 
  658         assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
 
  659         mhtml_parser_pstate_pop( parser ); 
 
  660         mhtml_parser_reset_token( parser );
 
  662      } 
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
 
  663         retval = mhtml_parser_append_token( parser, c );
 
  664         maug_cleanup_if_not_ok();
 
  666      } 
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  667         retval = mhtml_parser_append_token( parser, c );
 
  668         maug_cleanup_if_not_ok();
 
  671         mhtml_parser_invalid_c( parser, c, retval );
 
  677         MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) &&
 
  678         0 == parser->base.token_sz
 
  681         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
 
  682         maug_cleanup_if_not_ok();
 
  684      } 
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
 
  686         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
 
  687         maug_cleanup_if_not_ok();
 
  689      } 
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
 
  690         retval = mhtml_parser_append_token( parser, c );
 
  691         maug_cleanup_if_not_ok();
 
  693      } 
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  694         retval = mhtml_parser_append_token( parser, c );
 
  695         maug_cleanup_if_not_ok();
 
  698         mhtml_parser_invalid_c( parser, c, retval );
 
  703      if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
 
  704         retval = mhtml_push_attrib_key( parser );
 
  705         maug_cleanup_if_not_ok();
 
  706         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_VAL );
 
  707         maug_cleanup_if_not_ok();
 
  708         mhtml_parser_reset_token( parser );
 
  710      } 
else if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
 
  711         retval = mhtml_parser_append_token( parser, c );
 
  712         maug_cleanup_if_not_ok();
 
  714      } 
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  715         retval = mhtml_parser_append_token( parser, c );
 
  716         maug_cleanup_if_not_ok();
 
  719         mhtml_parser_invalid_c( parser, 
'_', retval );
 
  724      if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
 
  725         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_STRING );
 
  726         maug_cleanup_if_not_ok();
 
  727         mhtml_parser_reset_token( parser );
 
  729      } 
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
 
  730         retval = _mhtml_set_attrib_val( parser );
 
  731         maug_cleanup_if_not_ok();
 
  732         mhtml_parser_pstate_pop( parser );
 
  733         assert( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) );
 
  734         mhtml_parser_pstate_pop( parser );
 
  735         mhtml_parser_reset_token( parser );
 
  737      } 
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  738         retval = mhtml_parser_append_token( parser, c );
 
  739         maug_cleanup_if_not_ok();
 
  742         mhtml_parser_invalid_c( parser, 
'_', retval );
 
  752      if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
 
  753         retval = mhtml_push_element_tag( parser );
 
  754         maug_cleanup_if_not_ok();
 
  755         retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_KEY );
 
  756         maug_cleanup_if_not_ok();
 
  757         mhtml_parser_reset_token( parser );
 
  759      } 
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
 
  760         retval = mhtml_parser_append_token( parser, c );
 
  761         maug_cleanup_if_not_ok();
 
  763      } 
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
 
  766      } 
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
 
  769            0 < parser->base.token_sz &&
 
  770            ' ' != parser->base.token[parser->base.token_sz - 1]
 
  772            retval = mhtml_parser_append_token( parser, 
' ' );
 
  773            maug_cleanup_if_not_ok();
 
  777         mhtml_parser_invalid_c( parser, 
'_', retval );
 
  782      retval = mhtml_parser_append_token( parser, c );
 
  783      maug_cleanup_if_not_ok();
 
  789   mparser_wait( &((parser)->base) );
 
  793   parser->base.last_c = c;
 
  795   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  806   mhtml_parser_set_tag_iter( parser, -1 );
 
  807   parser->body_idx = -1;
 
  809   retval = mcss_parser_init( &(parser->styler) );
 
  810   maug_cleanup_if_not_ok();
 
  821   char dump_line[MHTML_DUMP_LINE_SZ + 1];
 
  823   ssize_t first_child = -1;
 
  824   ssize_t next_sibling = -1;
 
  826   char* tag_contents = NULL;
 
  834   p_tag_iter = mdata_vector_get( &(parser->tags), iter, 
union MHTML_TAG );
 
  835   assert( NULL != p_tag_iter );
 
  837   maug_mzero( dump_line, MHTML_DUMP_LINE_SZ + 1 );
 
  839   for( i = 0 ; d > i ; i++ ) {
 
  840      assert( i < MHTML_DUMP_LINE_SZ );
 
  841      strcat( dump_line, 
" " );
 
  843   if( MHTML_TAG_TYPE_TEXT == p_tag_iter->base.type ) {
 
  844      if( -1 == p_tag_iter->TEXT.content_idx ) {
 
  845         error_printf( 
"no tag content present!" );
 
  849      mdata_strpool_lock( &(parser->strpool) );
 
  852         maug_strlen( dump_line ) + 7 
 
  853            + p_tag_iter->TEXT.content_sz < MHTML_DUMP_LINE_SZ
 
  855         strcat( dump_line, 
"TEXT: " );
 
  856         tag_contents = mdata_strpool_get(
 
  857            &(parser->strpool), p_tag_iter->TEXT.content_idx );
 
  858         if( NULL == tag_contents ) {
 
  859            error_printf( 
"could not retrieve tag contents!" );
 
  860            retval = MERROR_ALLOC;
 
  863         strcat( dump_line, tag_contents );
 
  864         strcat( dump_line, 
"\n" );
 
  867      mdata_strpool_unlock( &(parser->strpool) );
 
  871         maug_strlen( dump_line ) +
 
  872         maug_strlen( gc_mhtml_tag_names[p_tag_iter->base.type] ) <
 
  876            gc_mhtml_tag_names[p_tag_iter->base.type] );
 
  880         0 <= p_tag_iter->base.style &&
 
  881         maug_strlen( dump_line ) + 9  < MHTML_DUMP_LINE_SZ
 
  883         strcat( dump_line, 
" (styled)" );
 
  887         0 < p_tag_iter->base.id_sz &&
 
  888         maug_strlen( dump_line ) + 7 
 
  889            + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
 
  891         maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
 
  892            MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
 
  893            " (id: %s)", p_tag_iter->base.id );
 
  897         0 < p_tag_iter->base.classes_sz &&
 
  898         maug_strlen( dump_line ) + 12 
 
  899            + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
 
  901         maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
 
  902            MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
 
  903            " (classes: %s)", p_tag_iter->base.classes );
 
  907         MHTML_TAG_TYPE_IMG == p_tag_iter->base.type &&
 
  908         0 < p_tag_iter->IMG.src_sz &&
 
  909         maug_strlen( dump_line ) + 8 
 
  910            + maug_strlen( p_tag_iter->IMG.src ) < MHTML_DUMP_LINE_SZ
 
  912         maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
 
  913            MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
 
  914            " (src: %s)", p_tag_iter->IMG.src );
 
  918         MHTML_TAG_TYPE_INPUT == p_tag_iter->base.type &&
 
  919         0 < p_tag_iter->INPUT.value_sz &&
 
  920         maug_strlen( dump_line ) + 10 
 
  921            + maug_strlen( p_tag_iter->INPUT.value ) < MHTML_DUMP_LINE_SZ
 
  923         maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
 
  924            MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
 
  925            " (value: %s)", p_tag_iter->INPUT.value );
 
  930   debug_printf( 1, 
"%s", dump_line );
 
  932   first_child = p_tag_iter->base.first_child;
 
  933   next_sibling = p_tag_iter->base.next_sibling;
 
  937   retval = mhtml_dump_tree( parser, first_child, d + 1 );
 
  938   maug_cleanup_if_not_ok();
 
  940   retval = mhtml_dump_tree( parser, next_sibling, d );
 
  941   maug_cleanup_if_not_ok();
 
  945   if( mdata_vector_is_locked( &(parser->tags) ) ) {
 
  954#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \ 
  955   extern MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name; 
  957MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
 
  959extern MAUG_CONST 
char* SEG_MCONST gc_mhtml_tag_names[];
 
uint16_t MERROR_RETVAL
Return type indicating function returns a value from this list.
Definition: merror.h:19
ssize_t mdata_vector_append(struct MDATA_VECTOR *v, const void *item, size_t item_sz)
Append an item to the specified vector.
MERROR_RETVAL mdata_vector_remove(struct MDATA_VECTOR *v, size_t idx)
Remove item at the given index, shifting subsequent items up by 1.
A pool of immutable text strings. Deduplicates strings to save memory.
Definition: mdata.h:68
A vector of uniformly-sized objects, stored contiguously.
Definition: mdata.h:93
#define mdata_vector_lock(v)
Lock the vector. This should be done when items from the vector are actively being referenced,...
Definition: mdata.h:320
#define mdata_vector_unlock(v)
Unlock the vector so items may be added and removed.
Definition: mdata.h:353
#define mdata_vector_ct(v)
Number of items of MDATA_VECTOR::item_sz bytes actively stored in this vector.
Definition: mdata.h:396
uint8_t tag_flags
Flags to be pushed to MHTML_TAG_BASE::flags on next mhtml_push_tag().
Definition: mhtml.h:158