Commit 267105b8 authored by Fabien Triolet's avatar Fabien Triolet
Browse files

Added a new generator for csv files, vcsReader extends AbstractXMLReader now

parent 1cf9e85a
/*
* $Id: AbstractXMLReader.java,v 1.1 2002-10-18 11:58:34 triolet Exp $
*
* Transmorpher
*
* Copyright (C) 2001-2002 Fluxmedia and INRIA Rhône-Alpes.
*
* http://www.fluxmedia.fr - http://transmorpher.inrialpes.fr
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package fr.fluxmedia.tmcontrib.generator;
// imported java classes
import java.io.IOException;
import java.util.*;
// imported SAX2 classes
import org.xml.sax.*;
/**
* This abstract class implements SAX2 interface XMLReader. It aims to
* allow the sub-classes to behave like XMLReader implementations.
*
* This class is defined in the following book "Java and XSLT" by Eric M. Burke (O'Reilly)
*@author triolet
*/
public abstract class AbstractXMLReader implements XMLReader {
private Map featureMap = new HashMap();
private Map propertyMap = new HashMap();
private EntityResolver entityResolver;
private DTDHandler dtdHandler;
private ContentHandler contentHandler;
private ErrorHandler errorHandler;
/**
* Sub-classes have to implement this method in order to produce SAX2 events.
*
*@param input The InputSource to parse.
*@exception IOException if the input has not been found
*@exception SAXException an exception which can wrap others exceptions
*/
public abstract void parse(InputSource input) throws IOException, SAXException;
/**
* Gets the feature attribute of the AbstractXMLReader object
*
*@param name The feature name
*@return The feature value
*@exception SAXNotRecognizedException if the feature is not recognized
*@exception SAXNotSupportedException if the feature is not supported
*/
public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
Boolean featureValue = (Boolean)this.featureMap.get(name);
return (featureValue == null) ? false
: featureValue.booleanValue();
}
/**
* Sets the feature attribute of the AbstractXMLReader object
*
*@param name The new feature name
*@param value The new feature value
*@exception SAXNotRecognizedException if the feature is not recognized
*@exception SAXNotSupportedException if the feature is not supported
*/
public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
this.featureMap.put(name, new Boolean(value));
}
/**
* Gets the property attribute of the AbstractXMLReader object
*
*@param name The property name
*@return The property value
*@exception SAXNotRecognizedException if the property is not recognized
*@exception SAXNotSupportedException if the property is not supported
*/
public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
return this.propertyMap.get(name);
}
/**
* Sets the property attribute of the AbstractXMLReader object
*
*@param name The new property name
*@param value The new property value
*@exception SAXNotRecognizedException if the property is not recognized
*@exception SAXNotSupportedException if the property is not supported
*/
public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
this.propertyMap.put(name, value);
}
/**
* Sets the entityResolver attribute of the AbstractXMLReader object
*
*@param entityResolver The new entityResolver value
*/
public void setEntityResolver(EntityResolver entityResolver) {
this.entityResolver = entityResolver;
}
/**
* Gets the entityResolver attribute of the AbstractXMLReader object
*
*@return The entityResolver value
*/
public EntityResolver getEntityResolver() {
return this.entityResolver;
}
/**
* Sets the dTDHandler attribute of the AbstractXMLReader object
*
*@param dtdHandler The new dTDHandler value
*/
public void setDTDHandler(DTDHandler dtdHandler) {
this.dtdHandler = dtdHandler;
}
/**
* Gets the dTDHandler attribute of the AbstractXMLReader object
*
*@return The dTDHandler value
*/
public DTDHandler getDTDHandler() {
return this.dtdHandler;
}
/**
* Sets the contentHandler attribute of the AbstractXMLReader object
*
*@param contentHandler The new contentHandler value
*/
public void setContentHandler(ContentHandler contentHandler) {
this.contentHandler = contentHandler;
}
/**
* Gets the contentHandler attribute of the AbstractXMLReader object
*
*@return The contentHandler value
*/
public ContentHandler getContentHandler() {
return this.contentHandler;
}
/**
* Sets the errorHandler attribute of the AbstractXMLReader object
*
*@param errorHandler The new errorHandler value
*/
public void setErrorHandler(ErrorHandler errorHandler) {
this.errorHandler = errorHandler;
}
/**
* Gets the errorHandler attribute of the AbstractXMLReader object
*
*@return The errorHandler value
*/
public ErrorHandler getErrorHandler() {
return this.errorHandler;
}
/**
* Parses the file represented by the systemId
*
*@param systemId The systemID of the file to parse
*@exception IOException if the input has not been found
*@exception SAXException an exception which can wrap others exceptions
*/
public void parse(String systemId) throws IOException, SAXException {
parse(new InputSource(systemId));
}
}
/*
* $Id: CSVXMLReader.java,v 1.1 2002-10-18 11:58:34 triolet Exp $
*
* Transmorpher
*
* Copyright (C) 2001-2002 Fluxmedia and INRIA Rhne-Alpes.
*
* http://www.fluxmedia.fr - http://transmorpher.inrialpes.fr
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package fr.fluxmedia.tmcontrib.generator;
//imported java classes
import java.io.*;
import java.net.URL;
//imported SAX2 classes
import org.xml.sax.*;
import org.xml.sax.helpers.*;
/**
* A class for parsing CSV (comma separated value) files. File datas are converted in
* SAX2 events.
* A simple CVS file can look like :
* <pre>
* a,b,c
* d,e,f
* </pre>
* Quotes can be used when values contain comma :
* <pre>
* a,"b,c",d
* e,"f,g","h,i"
* </pre>
* Separator can be set with <code>setSeparator<code>
*
* This class is defined in the following book "Java and XSLT" by Eric M. Burke (O'Reilly)
*@author triolet
*/
public class CSVXMLReader extends AbstractXMLReader {
private final static Attributes EMPTY_ATTR = new AttributesImpl();
private char separator=',';
public CSVXMLReader(String separator){
if(separator!=null)
this.separator = separator.charAt(0);
}
/**
* Analyses a CSV file. SAX2 events are sending to the ContentHandler.
*
*@param input The input to parse
*@exception IOException if input has not been found
*@exception SAXException an exception that can wrap others exceptions
*/
public void parse(InputSource input) throws IOException, SAXException {
ContentHandler ch = getContentHandler();
if (ch == null) {
return;
}
BufferedReader br = null;
if (input.getCharacterStream() != null) {
br = new BufferedReader(input.getCharacterStream());
} else if (input.getByteStream() != null) {
br = new BufferedReader(new InputStreamReader(input.getByteStream()));
} else if (input.getSystemId() != null) {
java.net.URL url = new URL(input.getSystemId());
br = new BufferedReader(new InputStreamReader(url.openStream()));
} else {
throw new SAXException("bad InputSource Object");
}
ch.startDocument();
//<csvfile>
ch.startElement("", "cvsfile", "cvsfile", EMPTY_ATTR);
String curLine = null;
while ((curLine = br.readLine()) != null) {
curLine = curLine.trim();
if (curLine.length() > 0) {
//<line>
ch.startElement("", "line", "line", EMPTY_ATTR);
parseLine(curLine, ch);
//</line>
ch.endElement("", "line", "line");
}
}
///<csvfile>
ch.endElement("", "csvfile", "csvfile");
ch.endDocument();
}
/**
* Sets the separator attribute of the CSVXMLReader object
*
*@param separator The new separator value
*/
public void setSeparator(String separator) {
this.separator = separator.charAt(0);
}
/**
* Recursive analyse of a line
*
*@param curLine The current line
*@param ch The contentHandler which receives SAX2 events
*@exception IOException if an I/O operation failed.
*@exception SAXException an exception that can wrap others exceptions
*/
private void parseLine(String curLine, ContentHandler ch) throws IOException, SAXException {
String firstToken = null;
String remainderOfLine = null;
int commaIndex = locateFirstDelimiter(curLine);
if (commaIndex > -1) {
firstToken = curLine.substring(0, commaIndex).trim();
remainderOfLine = curLine.substring(commaIndex + 1).trim();
} else {
firstToken = curLine;
}
firstToken = cleanupQuotes(firstToken);
//<value>
ch.startElement("", "value", "value", EMPTY_ATTR);
ch.characters(firstToken.toCharArray(), 0, firstToken.length());
//</value>
ch.endElement("", "value", "value");
if (remainderOfLine != null) {
parseLine(remainderOfLine, ch);
}
}
/**
* Locates the first separator character in the current line
*
*@param curLine The current line
*@return The index of the first separator character in the line, -1 if not found
*/
private int locateFirstDelimiter(String curLine) {
if (curLine.startsWith("\"")) {
boolean inQuote = true;
int numChars = curLine.length();
for (int i = 1; i < numChars; i++) {
char curChar = curLine.charAt(i);
if (curChar == '"') {
inQuote = !inQuote;
} else if (curChar == separator && !inQuote) {
return i;
}
}
return -1;
} else {
return curLine.indexOf(separator);
}
}
/**
* Removes quotes around a token
*
*@param token The token with quotes to remove
*@return The token without quotes
*/
private String cleanupQuotes(String token) {
StringBuffer buf = new StringBuffer();
int length = token.length();
int curIndex = 0;
if (token.startsWith("\"") && token.endsWith("\"")) {
curIndex = 1;
length--;
}
boolean oneQuoteFound = false;
boolean twoQuoteFound = false;
while (curIndex < length) {
char curChar = token.charAt(curIndex);
if (curChar == '"') {
twoQuoteFound = (oneQuoteFound) ? true : false;
oneQuoteFound = true;
} else {
oneQuoteFound = false;
twoQuoteFound = false;
}
if (twoQuoteFound) {
oneQuoteFound = false;
twoQuoteFound = false;
curIndex++;
continue;
}
buf.append(curChar);
curIndex++;
}
return buf.toString();
}
}
/*
* $Id: ReadCSV.java,v 1.1 2002-10-18 11:58:34 triolet Exp $
*
* Transmorpher
*
* Copyright (C) 2001-2002 Fluxmedia and INRIA Rhône-Alpes.
*
* http://www.fluxmedia.fr - http://transmorpher.inrialpes.fr
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package fr.fluxmedia.tmcontrib.generator;
// Imported TRANSMORPHER Classes
import fr.fluxmedia.transmorpher.TProcessFactory.TReader;
import fr.fluxmedia.transmorpher.TProcessFactory.XML_Port;
import fr.fluxmedia.transmorpher.Utils.Parameters;
import fr.fluxmedia.transmorpher.Utils.StringParameters;
//Imported JAVA classes
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
// Imported SAX Classes
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Description of the Class
*
*@author triolet
*/
public class ReadCSV extends TReader {
/**
* name of the file to parse
*/
String iFilename = null;
/**
*Constructor for the readCSV object
*
*@param pOut The out port of this generator
*@param pParam The parameters of this generator
*@param pStaticAttributes The parameters of this generator
*@exception SAXException an exception that an wrap others exceptions
*@exception IOException if an I/O operation failed
*/
public ReadCSV(String[] pOut, Parameters pParam, StringParameters pStaticAttributes) throws SAXException, IOException {
super(pOut, pParam, pStaticAttributes);
iReader = new CSVXMLReader(((String)pParam.getParameter("separator")));
}
/**
* Starts to parse the document
*
*@exception IOException if an I/O operation failed
*@exception SAXException an exception that an wrap others exceptions
*/
public void execProcess() throws IOException, SAXException {
iFilename = (String)getParameter("file");
if (isThread) {
myThread.start();
} else {
read();
}//end if
}//end proc
/**
* Creates an input source for the parser with the name of the file and starts
* the parsing
*
*@exception IOException if an I/O operation failed
*@exception SAXException an exception that an wrap others exceptions
*/
public void read() throws IOException, SAXException {
InputSource inSource;
if (iFilename != null) {
if (iFilename.startsWith("http://")) {
URL url = new URL(iFilename);
URLConnection urlConnect = url.openConnection();
InputStream inStream = urlConnect.getInputStream();
inSource = new InputSource(inStream);
inSource.setSystemId(iFilename);
} else {
File file = new File(iFilename);
inSource = new InputSource(new FileInputStream(file));
inSource.setSystemId(file.toURL().toExternalForm());
}
iReader.parse(inSource);
} else {
//reader.parse(standardInput);
}//end if
}
/**
* Begins the parsing of a file.
* This method is called inside a repeat . At each loop, a new file is parsed.
*
*@param file name of the file to parse
*@exception IOException if an I/O operation failed
*@exception SAXException an exception that an wrap others exceptions
*/
public void execProcess(String file) throws IOException, SAXException {
iFilename = file;
try {
if (iFilename != null) {
read();
//iReader.parse(iFilename);
} else {
//reader.parse(standardInput);
}//end if
} catch (SAXException e) {
System.out.println("[readvcs] " + e + " " + file);
e.printStackTrace();
}
}
/**
* Starts the parsing of a file using Thread
*/
public final void run() {
if (!isRunning) {
try {
read();
//myThread.interrupt();
} catch (Exception e) {
System.err.println("Fin du parse 2 !erreur dans le thread");
e.printStackTrace();
}//end try
isRunning = true;
}//end if
}//end run
}
......@@ -50,7 +50,7 @@ import org.xml.sax.helpers.AttributesImpl;
*
*@author Bruno Charre
*/
class vcsReader implements XMLReader {
class vcsReader extends AbstractXMLReader {
private final static String NAMESPACE = "http://co4.inrialpes.fr/xml/pimlib/vcs";
......@@ -67,11 +67,6 @@ class vcsReader implements XMLReader {
*/
protected Vector attributesENCODING = new Vector();
ContentHandler contentHandler = null;
DTDHandler DTDHandler = null;
EntityResolver entityResolver = null;
ErrorHandler errorHandler = null;
/**
* The type of the file
*/
......@@ -102,133 +97,6 @@ class vcsReader implements XMLReader {
}
}
/**
* Gets the feature attribute of the vcsReader object
*
*@param name Description of the Parameter
*@return The feature value
*@exception SAXNotRecognizedException Description of the Exception
*@exception SAXNotSupportedException Description of the Exception
*/
public boolean getFeature(String name)
throws SAXNotRecognizedException,
SAXNotSupportedException {
return false;
}