001 /*
002 * Copyright (c) 2005 Marko Karppinen & Co. LLC
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package fi.karppinen.xml;
024
025 import org.xml.sax.ContentHandler;
026 import org.xml.sax.SAXException;
027
028 /**
029 * Parses plain text with double line break paragraph breaks as XHTML 1.x paragraphs.
030 *
031 * @version $Id: SimpleParagraphParser.java,v 1.1 2006/10/30 19:57:09 hsivonen Exp $
032 * @author hsivonen
033 */
034 public class SimpleParagraphParser {
035
036 private char[] content;
037
038 private XhtmlEmitter emitter;
039
040 private ContentHandler contentHandler;
041
042 private String htmlClass = null;
043
044 private char[] SPACE = { ' ' };
045
046 private boolean somethingRendered = false;
047
048 /**
049 *
050 */
051 public SimpleParagraphParser(String content, ContentHandler contentHandler) {
052 this.content = content.toCharArray();
053 this.emitter = new XhtmlEmitter(contentHandler);
054 this.contentHandler = contentHandler;
055 }
056
057 public void parse() throws SAXException {
058 int i = 0;
059 int len = content.length;
060 int start = 0;
061 boolean paraOpen = false;
062 boolean prevWasSpace = false;
063 while (i < len) {
064 if (!paraOpen) {
065 whitespace: while (i < len) {
066 switch (content[i]) {
067 case ' ':
068 case '\n':
069 case '\r':
070 case '\t':
071 i++;
072 break;
073 default:
074 break whitespace;
075 }
076 }
077 }
078 if (i < len) {
079 if (!paraOpen) {
080 if (htmlClass == null) {
081 emitter.startElement("p");
082 } else {
083 emitter.startElementWithClass("p", htmlClass);
084 }
085 somethingRendered = true;
086 paraOpen = true;
087 start = i;
088 }
089 innerloop: while (i < len) {
090 switch (content[i]) {
091 case '\r':
092 if (i + 1 < len && content[i + 1] == '\n') {
093 if (start < i) {
094 contentHandler.characters(content, start, i
095 - start);
096 }
097 i++;
098 start = i;
099 break;
100 }
101 // fall thru!
102 case '\n':
103 if (prevWasSpace) {
104 if (start < i) {
105 contentHandler.characters(content, start, i
106 - start);
107 }
108 i++;
109 start = i;
110 prevWasSpace = false;
111 break innerloop;
112 } else {
113 if (start < i) {
114 contentHandler.characters(content, start, i
115 - start);
116 }
117 i++;
118 start = i;
119 emitter.characters(SPACE);
120 prevWasSpace = true;
121 }
122 break;
123 default:
124 prevWasSpace = false;
125 i++;
126 }
127 }
128 }
129 if (paraOpen) {
130 emitter.endElement("p");
131 paraOpen = false;
132 }
133 }
134 }
135 /**
136 * Returns the somethingRendered.
137 *
138 * @return the somethingRendered
139 */
140 public boolean isSomethingRendered() {
141 return somethingRendered;
142 }
143 /**
144 * Returns the htmlClass.
145 *
146 * @return the htmlClass
147 */
148 public String getHtmlClass() {
149 return htmlClass;
150 }
151 /**
152 * Sets the htmlClass.
153 *
154 * @param htmlClass the htmlClass to set
155 */
156 public void setHtmlClass(String htmlClass) {
157 this.htmlClass = htmlClass;
158 }
159 }