ContextSerializerCsv.java
- package org.thegalactic.context.io;
- /*
- * ContextSerializerCsv.java
- *
- * Copyright: 2010-2015 Karell Bertet, France
- * Copyright: 2015-2016 The Galactic Organization, France
- *
- * License: http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html CeCILL-B license
- *
- * This file is part of java-lattices.
- * You can redistribute it and/or modify it under the terms of the CeCILL-B license.
- */
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.IOException;
- import java.util.List;
- import java.util.TreeSet;
- import org.apache.commons.csv.CSVFormat;
- import org.apache.commons.csv.CSVParser;
- import org.apache.commons.csv.CSVPrinter;
- import org.apache.commons.csv.CSVRecord;
- import org.thegalactic.context.Context;
- import org.thegalactic.io.Reader;
- import org.thegalactic.io.Writer;
- /**
- * This class defines the way for reading a context from a csv file.
- *
- * 
- *
- * @uml ContextSerializerCsv.png
- * !include resources/org/thegalactic/context/io/ContextSerializerCsv.iuml
- * !include resources/org/thegalactic/io/Reader.iuml
- * !include resources/org/thegalactic/io/Writer.iuml
- *
- * hide members
- * show ContextSerializerCsv members
- * class ContextSerializerCsv #LightCyan
- * title ContextSerializerCsv UML graph
- */
- public final class ContextSerializerCsv implements Reader<Context>, Writer<Context> {
- /**
- * The singleton instance.
- */
- private static final ContextSerializerCsv INSTANCE = new ContextSerializerCsv();
- /**
- * Return the singleton instance of this class.
- *
- * @return the singleton instance
- */
- public static ContextSerializerCsv getInstance() {
- return INSTANCE;
- }
- /**
- * Register this class for reading .csv files.
- */
- public static void register() {
- ContextIOFactory.getInstance().registerReader(ContextSerializerCsv.getInstance(), "csv");
- ContextIOFactory.getInstance().registerWriter(ContextSerializerCsv.getInstance(), "csv");
- }
- /**
- * This class is not designed to be publicly instantiated.
- */
- private ContextSerializerCsv() {
- }
- /**
- * Read a context from a csv file.
- *
- * The following format is respected:
- *
- * The first line contains the attribute names, the other lines contains the
- * observations identifier followed by boolean values
- *
- * ~~~
- * "",a,b,c,d,e
- * 1,1,0,1,0,0
- * 2,1,1,0,0,0
- * 3,0,1,0,1,1
- * 4,0,0,1,0,1
- * ~~~
- *
- * If the first attribute is the empty string, the first column corresponds
- * to the individual identifiers. In the other case, the individual
- * identifiers will be generated by successive integers.
- *
- * ~~~
- * a,b,c,d,e
- * 1,0,1,0,0
- * 1,1,0,0,0
- * 0,1,0,1,1
- * 0,0,1,0,1
- * ~~~
- *
- * @param context a context to read
- * @param file a file
- *
- * @throws IOException When an IOException occurs
- */
- public void read(Context context, BufferedReader file) throws IOException {
- // Parse the file
- CSVParser parser = CSVFormat.RFC4180.parse(file);
- // Get the records and record size
- List<CSVRecord> records = parser.getRecords();
- int length = records.size();
- // Verify length
- if (length == 0) {
- throw new IOException("CSV cannot be empty");
- }
- // Get the attributes and the attribute size
- CSVRecord attributes = records.get(0);
- int size = attributes.size();
- // Detect invalid attribute size
- if (size == 1 && attributes.get(0).equals("")) {
- throw new IOException("Attribute size cannot be 0");
- }
- // Index of the first attribute
- int first = 0;
- if (attributes.get(0).equals("")) {
- first = 1;
- }
- // Get the attributes
- for (int i = first; i < size; i++) {
- String attribute = attributes.get(i);
- // Detect duplicated attribute
- if (!context.addToAttributes(attribute)) {
- throw new IOException("Duplicated attribute");
- }
- // Detect empty attribute
- if ("".equals(attribute)) {
- throw new IOException("Empty attribute");
- }
- }
- // Get the data
- for (int j = 1; j < length; j++) {
- // Get the current record
- CSVRecord record = records.get(j);
- // Detect incorrect size
- if (record.size() != size) {
- throw new IOException("Line does not have the correct number of attributes");
- }
- // Get the observation identifier
- String identifier;
- if (first == 1) {
- identifier = record.get(0);
- } else {
- identifier = String.valueOf(j);
- }
- // Detect duplicated identifier
- if (!context.addToObservations(identifier)) {
- throw new IOException("Duplicated identifier");
- }
- // Add the extent/intent for the current identifier and current attribute
- for (int i = first; i < size; i++) {
- if (record.get(i).equals("1")) {
- context.addExtentIntent(identifier, attributes.get(i));
- }
- }
- }
- // Close the parser
- parser.close();
- context.setBitSets();
- }
- /**
- * Write a context to a csv file.
- *
- * The following format is respected:
- *
- * The first line contains the attribute names, the other lines contains the
- * observations identifier followed by boolean values
- *
- * ~~~
- * "",a,b,c,d,e
- * 1,1,0,1,0,0
- * 2,1,1,0,0,0
- * 3,0,1,0,1,1
- * 4,0,0,1,0,1
- * ~~~
- *
- * @param context a context to write
- * @param file a file
- *
- * @throws IOException When an IOException occurs
- */
- public void write(Context context, BufferedWriter file) throws IOException {
- CSVPrinter printer = new CSVPrinter(file, CSVFormat.RFC4180);
- // Get the observations and the attributes
- TreeSet<Comparable> observations = context.getObservations();
- TreeSet<Comparable> attributes = context.getAttributes();
- // Prepare the attribute line
- printer.print("");
- for (Comparable attribute : attributes) {
- // Write each attribute
- printer.print(attribute);
- }
- printer.println();
- for (Comparable observation : observations) {
- // Write the observation
- printer.print(observation);
- // Write the extent/intents
- for (Comparable attribute : attributes) {
- if (context.getIntent(observation).contains(attribute)) {
- printer.print(1);
- } else {
- printer.print(0);
- }
- }
- printer.println();
- }
- printer.close();
- }
- }