ContextSerializerCsv.java
package org.thegalactic.context.io;
/*
* ContextSerializerCsv.java
*
* Copyright: 2010-2015 Karell Bertet, France
* Copyright: 2015-2016 The Galactic Organization, France
*
* License: http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html CeCILL-B license
*
* This file is part of java-lattices.
* You can redistribute it and/or modify it under the terms of the CeCILL-B license.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.List;
import java.util.TreeSet;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.thegalactic.context.Context;
import org.thegalactic.io.Reader;
import org.thegalactic.io.Writer;
/**
* This class defines the way for reading a context from a csv file.
*
* ![ContextSerializerCsv](ContextSerializerCsv.png)
*
* @uml ContextSerializerCsv.png
* !include resources/org/thegalactic/context/io/ContextSerializerCsv.iuml
* !include resources/org/thegalactic/io/Reader.iuml
* !include resources/org/thegalactic/io/Writer.iuml
*
* hide members
* show ContextSerializerCsv members
* class ContextSerializerCsv #LightCyan
* title ContextSerializerCsv UML graph
*/
public final class ContextSerializerCsv implements Reader<Context>, Writer<Context> {
/**
* The singleton instance.
*/
private static final ContextSerializerCsv INSTANCE = new ContextSerializerCsv();
/**
* Return the singleton instance of this class.
*
* @return the singleton instance
*/
public static ContextSerializerCsv getInstance() {
return INSTANCE;
}
/**
* Register this class for reading .csv files.
*/
public static void register() {
ContextIOFactory.getInstance().registerReader(ContextSerializerCsv.getInstance(), "csv");
ContextIOFactory.getInstance().registerWriter(ContextSerializerCsv.getInstance(), "csv");
}
/**
* This class is not designed to be publicly instantiated.
*/
private ContextSerializerCsv() {
}
/**
* Read a context from a csv file.
*
* The following format is respected:
*
* The first line contains the attribute names, the other lines contains the
* observations identifier followed by boolean values
*
* ~~~
* "",a,b,c,d,e
* 1,1,0,1,0,0
* 2,1,1,0,0,0
* 3,0,1,0,1,1
* 4,0,0,1,0,1
* ~~~
*
* If the first attribute is the empty string, the first column corresponds
* to the individual identifiers. In the other case, the individual
* identifiers will be generated by successive integers.
*
* ~~~
* a,b,c,d,e
* 1,0,1,0,0
* 1,1,0,0,0
* 0,1,0,1,1
* 0,0,1,0,1
* ~~~
*
* @param context a context to read
* @param file a file
*
* @throws IOException When an IOException occurs
*/
public void read(Context context, BufferedReader file) throws IOException {
// Parse the file
CSVParser parser = CSVFormat.RFC4180.parse(file);
// Get the records and record size
List<CSVRecord> records = parser.getRecords();
int length = records.size();
// Verify length
if (length == 0) {
throw new IOException("CSV cannot be empty");
}
// Get the attributes and the attribute size
CSVRecord attributes = records.get(0);
int size = attributes.size();
// Detect invalid attribute size
if (size == 1 && attributes.get(0).equals("")) {
throw new IOException("Attribute size cannot be 0");
}
// Index of the first attribute
int first = 0;
if (attributes.get(0).equals("")) {
first = 1;
}
// Get the attributes
for (int i = first; i < size; i++) {
String attribute = attributes.get(i);
// Detect duplicated attribute
if (!context.addToAttributes(attribute)) {
throw new IOException("Duplicated attribute");
}
// Detect empty attribute
if ("".equals(attribute)) {
throw new IOException("Empty attribute");
}
}
// Get the data
for (int j = 1; j < length; j++) {
// Get the current record
CSVRecord record = records.get(j);
// Detect incorrect size
if (record.size() != size) {
throw new IOException("Line does not have the correct number of attributes");
}
// Get the observation identifier
String identifier;
if (first == 1) {
identifier = record.get(0);
} else {
identifier = String.valueOf(j);
}
// Detect duplicated identifier
if (!context.addToObservations(identifier)) {
throw new IOException("Duplicated identifier");
}
// Add the extent/intent for the current identifier and current attribute
for (int i = first; i < size; i++) {
if (record.get(i).equals("1")) {
context.addExtentIntent(identifier, attributes.get(i));
}
}
}
// Close the parser
parser.close();
context.setBitSets();
}
/**
* Write a context to a csv file.
*
* The following format is respected:
*
* The first line contains the attribute names, the other lines contains the
* observations identifier followed by boolean values
*
* ~~~
* "",a,b,c,d,e
* 1,1,0,1,0,0
* 2,1,1,0,0,0
* 3,0,1,0,1,1
* 4,0,0,1,0,1
* ~~~
*
* @param context a context to write
* @param file a file
*
* @throws IOException When an IOException occurs
*/
public void write(Context context, BufferedWriter file) throws IOException {
CSVPrinter printer = new CSVPrinter(file, CSVFormat.RFC4180);
// Get the observations and the attributes
TreeSet<Comparable> observations = context.getObservations();
TreeSet<Comparable> attributes = context.getAttributes();
// Prepare the attribute line
printer.print("");
for (Comparable attribute : attributes) {
// Write each attribute
printer.print(attribute);
}
printer.println();
for (Comparable observation : observations) {
// Write the observation
printer.print(observation);
// Write the extent/intents
for (Comparable attribute : attributes) {
if (context.getIntent(observation).contains(attribute)) {
printer.print(1);
} else {
printer.print(0);
}
}
printer.println();
}
printer.close();
}
}