001 /* 002 * To change this template, choose Tools | Templates 003 * and open the template in the editor. 004 */ 005 006 package org.util.xml.parse; 007 008 import java.io.PrintWriter; 009 import java.io.StringWriter; 010 import java.io.IOException; 011 import org.util.xml.parse.policy.ParserPolicy; 012 import org.util.xml.element.Attribute; 013 import org.util.xml.element.TagElement; 014 import org.util.xml.element.TextElement; 015 016 /** 017 * 018 * @author masaru 019 */ 020 public class ElementPartParser extends ParseElement { 021 022 // parsers 023 protected NameParser name_parser_; 024 protected SpaceParser space_parser_; 025 protected AttributeParser attribute_parser_; 026 protected TextElementParser text_element_parser_; 027 protected CommentInnerTagElementParser comment_inner_tag_element_parser_; 028 029 protected ParserPolicy policy_; 030 031 protected boolean is_novalue_occuered_; 032 protected boolean is_error_; 033 034 protected TagElement tag_element_; 035 protected TextElement text_element_; 036 protected String endtag_name_; 037 protected boolean is_start_tag_; 038 protected StringBuffer error_text_; 039 040 public ElementPartParser() { 041 this(null); 042 } 043 044 public ElementPartParser(ParserPolicy policy) { 045 policy_ = policy; 046 047 name_parser_ = new NameParser(); 048 space_parser_ = new SpaceParser(); 049 attribute_parser_ = new AttributeParser(); 050 text_element_parser_ = new TextElementParser(); 051 comment_inner_tag_element_parser_ = new CommentInnerTagElementParser(); 052 error_text_ = new StringBuffer(); 053 } 054 055 @Override 056 public boolean match(char c) { 057 return c=='<'; 058 } 059 060 @Override 061 public int parse(int c, ElementParser parser) throws XMLParseException, IOException { 062 063 is_error_ = false; 064 is_start_tag_ = false; 065 endtag_name_ = null; 066 text_element_ = null; 067 tag_element_ = null; 068 is_novalue_occuered_ = false; 069 070 int next_word_ = -1; 071 int state = 0; 072 try{ 073 074 while(true) { 075 076 //System.out.println("state:"+state+"|"+(char)c); 077 if(state == 0) { 078 if(c=='<') state = 2; 079 else if(isSpace(c)) ; 080 else if(text_element_parser_.match((char)c)){ 081 c = text_element_parser_.parse(c, parser); 082 text_element_ = new TextElement(text_element_parser_.getReturnValue()); 083 break; 084 } else escape(parser, "parse error: cannot read tag: state=0 ???"); 085 }else if(state == 1) { 086 if(c=='<') state = 2; 087 else throw new XMLParseException("parse error: cannot read tag: state=1 ???"); 088 }else if(state == 2) { 089 if((c=='/')) state = 6; 090 else if(c=='?') { 091 state = 9; 092 } else if(comment_inner_tag_element_parser_.match((char)c)) { 093 c = comment_inner_tag_element_parser_.parse(c, parser); 094 text_element_ = comment_inner_tag_element_parser_.getResult(); 095 break; 096 } else if(name_parser_.match((char)c)) { 097 c = name_parser_.parse(c, parser); 098 String key = name_parser_.getReturnValue(); 099 tag_element_ = new TagElement(key); 100 if(policy_ != null && policy_.forceEmptyTag(key)) 101 tag_element_.setEmpty(true); 102 else 103 is_start_tag_ = true; 104 state = 10; 105 continue; 106 } else throw new XMLParseException("parse error: cannot read tag: this charactar is not allowed at start of tag ("+(char)c+")"); 107 }else if(state == 3) { 108 next_word_ = c; 109 break; 110 } else if(state == 4) { 111 tag_element_.setEmpty(true); 112 is_start_tag_ = false; 113 if(c=='>') break; 114 else throw new XMLParseException("parse error: cannot read tag: [<.../"+(char)c+"]"); 115 }else if(state == 6) { 116 c = name_parser_.parse(c, parser); 117 endtag_name_ = name_parser_.getReturnValue(); 118 state = 7; 119 continue; 120 }else if(state == 7) { 121 if(c=='>') { 122 break; 123 } else if(space_parser_.match((char)c)) { 124 c = space_parser_.parse(c, parser); 125 continue; 126 } else throw new XMLParseException("parse error: cannot read tag: state=7 cannot find '>'"); 127 }else if(state == 9) { 128 c = name_parser_.parse(c, parser); 129 tag_element_ = new TagElement(name_parser_.getReturnValue()); 130 tag_element_.setPI(true); 131 state = 10; 132 continue; 133 }else if(state == 10) { 134 if(space_parser_.match((char)c)) { 135 c = space_parser_.parse(c, parser); 136 state = 11; 137 continue; 138 } else { 139 if(tag_element_.isPI()){ 140 if(c=='?') state = 4; 141 else return escape(parser,"in <? ... ?> tag"); 142 } else { 143 if(c=='>') break; 144 else if(c=='/') state = 4; 145 else if(is_novalue_occuered_) { 146 c = attribute_parser_.parse(c, parser); 147 Attribute attribute = attribute_parser_.getAttribute(); 148 is_novalue_occuered_ = attribute.isNovalue(); 149 tag_element_.addAttribute(attribute); 150 state = 10; 151 continue; 152 } else return escape(parser,"cannot read "+(char)c); 153 } 154 } 155 }else if(state == 11) { 156 if(attribute_parser_.match((char)c)) { 157 c = attribute_parser_.parse(c, parser); 158 Attribute attribute = attribute_parser_.getAttribute(); 159 is_novalue_occuered_ = attribute.isNovalue(); 160 tag_element_.addAttribute(attribute); 161 state = 10; 162 continue; 163 } else { 164 if(tag_element_.isPI()){ 165 if(c=='?') state = 4; 166 else throw new XMLParseException("parse error: cannot read tag: state=11"); 167 } else { 168 if(c=='>') break; 169 else if(c=='/') state = 4; 170 else throw new XMLParseException("parse error: cannot read tag: state=11"); 171 } 172 } 173 } 174 if(state==0) 175 c = parser.get(); 176 else 177 c = parser.getChar(); 178 } 179 180 } catch(IOException e) { 181 is_error_ = true; 182 StringWriter sw = new StringWriter(); 183 e.printStackTrace(new PrintWriter(sw)); 184 if(policy_.throwExceptionIfDocumentHasError()) 185 error_text_.append(sw.toString()); 186 parser.escape(e.getMessage()); 187 } catch(XMLParseException e) { 188 is_error_ = true; 189 StringWriter sw = new StringWriter(); 190 e.printStackTrace(new PrintWriter(sw)); 191 if(policy_.throwExceptionIfDocumentHasError()) 192 error_text_.append(sw.toString()); 193 parser.escape(e.getMessage()); 194 } 195 196 int result = -1; 197 try { 198 if(text_element_!=null) 199 result = c; 200 else 201 result = parser.get(); 202 } 203 catch (IOException e) { 204 throw new XMLParseException(e.toString()); 205 } 206 return result; 207 } 208 209 public int escape (ElementParser parser,String message) throws XMLParseException, IOException { 210 //try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();} 211 is_error_ = true; 212 System.err.println("this documents has error: "+message); 213 System.err.println("skip---------------------"); 214 int c = parser.get(); 215 System.err.print((char)c); 216 while(c!='>' && c!=-1) System.err.print((char)(c=parser.get())); 217 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get())); 218 System.err.println("\n-------------------------"); 219 return parser.get(); 220 } 221 222 public boolean isTagElement() { 223 return (tag_element_!=null); 224 } 225 public boolean isTextElement() { 226 return (text_element_!=null); 227 } 228 public boolean isStartTag() { 229 return is_start_tag_; 230 } 231 public TextElement getTextElement() { 232 return text_element_; 233 } 234 public TagElement getTagElement() { 235 return tag_element_; 236 } 237 public String getEndTagName() { 238 return endtag_name_; 239 } 240 }