Dziękuję wam b. za odpowiedz. Jutro założę sobie konto. Mam zadanie które pobiera mi zniekształcony tekst (za dużo białych znaków, wierszy) , który ma być doprowadzony do odpowiedniej formy. Następnie tekst ma być podzielony na zdania oraz wyrazy mają być posortowane a następnie zapisane do XML tu używam StAX z czego jestem kompletnie zielony. Mam wszytko w kawałkach i nie do końca dobrze. Proszę o nakierowanie mnie co mam poprawić i jak to wszytko połączyć w szczególności jak tekst wrzucić do xml
XML ma mieć formę:
<tekst>
<zdanie>
<slowo>a</slowo>
<slowo>b</slowo>
.........
</zdanie>
<zdanie>
<slowo>zzzz</slowo>
.....
</zdanie>
<zdanie>
<slowo></slowo>
......
</zdanie>
</tekst>
Edycja tekstu:
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
public class JCA {
public static void main(String[] args) {
FileReader fr = null;
try {
fr = new FileReader("text.txt");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
BufferedReader inputText = new BufferedReader(fr);
String text = "", newText = "";
String allTogether = "";
try {
while ((text = inputText.readLine()) != null) {
newText += text.replaceAll("\\s+", " ").replaceAll(" ,", ",")
.replaceAll(" \\.", ".").replaceAll("\\..", ".");
allTogether = newText.replaceAll("\\s+", " ");
String[] splitText = allTogether.split(".");
}
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(allTogether);
System.out
.println("===================================================");
String[] splitText = allTogether.split("[.]");
for (int i = 0; i < splitText.length; i++) {
// System.out.println(splitText[i]);
String[] nexSplit = splitText[i].split("[ \t]");
for (int x = 0; x < nexSplit.length; x++) {
Arrays.sort(nexSplit, String.CASE_INSENSITIVE_ORDER);
System.out.println(nexSplit[x]);
}
}
}
}
klasa Driver
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLStreamException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
public class Driver {
public static void main(String[] args) throws TransformerException, XMLStreamException, FactoryConfigurationError {
System.out.println(transformXML(4, (new XMLEvent()).writeXML()));
}
public static String transformXML(int identation, String rawXML)
throws TransformerException {
TransformerFactory transformerFactory = TransformerFactory
.newInstance();
transformerFactory.setAttribute("indent-number", identation);
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
StreamResult streamResult = new StreamResult(new StringWriter());
transformer.transform(new StreamSource(new StringReader(rawXML)),
streamResult);
return streamResult.getWriter().toString();
}
}
Klasa XMLEvent
import java.io.ByteArrayOutputStream;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
public class XMLEvent {
String[][] word ={{"111aaa","111bbb","111ccc"},{"222aaa","222bbb"},{"333aaa","333bbb","333ccc"}};
public String writeXML() throws XMLStreamException, FactoryConfigurationError{
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
XMLEventWriter xmlWrite = (XMLOutputFactory.newInstance()).createXMLEventWriter(outStream);
XMLEventFactory xmlFactory = XMLEventFactory.newInstance();
xmlWrite.add(xmlFactory.createStartDocument());
xmlWrite.add(xmlFactory.createStartElement("", "", "text"));
newTopic(xmlFactory, xmlWrite, "", word[0]);
newTopic(xmlFactory, xmlWrite, "", word[1]);
newTopic(xmlFactory, xmlWrite, "", word[2]);
xmlWrite.add(xmlFactory.createEndElement("", "","text"));
xmlWrite.add(xmlFactory.createEndDocument());
xmlWrite.flush();
xmlWrite.close();
return outStream.toString();
}
public void newTopic( XMLEventFactory xmlFactory, XMLEventWriter xmlWriter, String sentence, String[] word) throws XMLStreamException{
xmlWriter.add(xmlFactory.createStartElement("", "", "sentence"));
int i = 0;
while (i< word.length) {
xmlWriter.add(xmlFactory.createStartElement("", "", "word"));
xmlWriter.add(xmlFactory.createCharacters(word[i++]));
xmlWriter.add(xmlFactory.createEndElement("", "", "word"));
}
xmlWriter.add(xmlFactory.createEndElement("", "", "topic"));
}
}