001 /* 002 * Copyright (c) 2005 Marko Karppinen & Co. LLC 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package fi.karppinen.xml; 024 025 import org.xml.sax.ContentHandler; 026 import org.xml.sax.SAXException; 027 028 /** 029 * Parses plain text with double line break paragraph breaks as XHTML 1.x paragraphs. 030 * 031 * @version $Id: SimpleParagraphParser.java,v 1.1 2006/10/30 19:57:09 hsivonen Exp $ 032 * @author hsivonen 033 */ 034 public class SimpleParagraphParser { 035 036 private char[] content; 037 038 private XhtmlEmitter emitter; 039 040 private ContentHandler contentHandler; 041 042 private String htmlClass = null; 043 044 private char[] SPACE = { ' ' }; 045 046 private boolean somethingRendered = false; 047 048 /** 049 * 050 */ 051 public SimpleParagraphParser(String content, ContentHandler contentHandler) { 052 this.content = content.toCharArray(); 053 this.emitter = new XhtmlEmitter(contentHandler); 054 this.contentHandler = contentHandler; 055 } 056 057 public void parse() throws SAXException { 058 int i = 0; 059 int len = content.length; 060 int start = 0; 061 boolean paraOpen = false; 062 boolean prevWasSpace = false; 063 while (i < len) { 064 if (!paraOpen) { 065 whitespace: while (i < len) { 066 switch (content[i]) { 067 case ' ': 068 case '\n': 069 case '\r': 070 case '\t': 071 i++; 072 break; 073 default: 074 break whitespace; 075 } 076 } 077 } 078 if (i < len) { 079 if (!paraOpen) { 080 if (htmlClass == null) { 081 emitter.startElement("p"); 082 } else { 083 emitter.startElementWithClass("p", htmlClass); 084 } 085 somethingRendered = true; 086 paraOpen = true; 087 start = i; 088 } 089 innerloop: while (i < len) { 090 switch (content[i]) { 091 case '\r': 092 if (i + 1 < len && content[i + 1] == '\n') { 093 if (start < i) { 094 contentHandler.characters(content, start, i 095 - start); 096 } 097 i++; 098 start = i; 099 break; 100 } 101 // fall thru! 102 case '\n': 103 if (prevWasSpace) { 104 if (start < i) { 105 contentHandler.characters(content, start, i 106 - start); 107 } 108 i++; 109 start = i; 110 prevWasSpace = false; 111 break innerloop; 112 } else { 113 if (start < i) { 114 contentHandler.characters(content, start, i 115 - start); 116 } 117 i++; 118 start = i; 119 emitter.characters(SPACE); 120 prevWasSpace = true; 121 } 122 break; 123 default: 124 prevWasSpace = false; 125 i++; 126 } 127 } 128 } 129 if (paraOpen) { 130 emitter.endElement("p"); 131 paraOpen = false; 132 } 133 } 134 } 135 /** 136 * Returns the somethingRendered. 137 * 138 * @return the somethingRendered 139 */ 140 public boolean isSomethingRendered() { 141 return somethingRendered; 142 } 143 /** 144 * Returns the htmlClass. 145 * 146 * @return the htmlClass 147 */ 148 public String getHtmlClass() { 149 return htmlClass; 150 } 151 /** 152 * Sets the htmlClass. 153 * 154 * @param htmlClass the htmlClass to set 155 */ 156 public void setHtmlClass(String htmlClass) { 157 this.htmlClass = htmlClass; 158 } 159 }