ContextSerializerCsv.java

package org.thegalactic.context.io;

/*
 * ContextSerializerCsv.java
 *
 * Copyright: 2010-2015 Karell Bertet, France
 * Copyright: 2015-2016 The Galactic Organization, France
 *
 * License: http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html CeCILL-B license
 *
 * This file is part of java-lattices.
 * You can redistribute it and/or modify it under the terms of the CeCILL-B license.
 */
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.List;
import java.util.TreeSet;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;

import org.thegalactic.context.Context;
import org.thegalactic.io.Reader;
import org.thegalactic.io.Writer;

/**
 * This class defines the way for reading a context from a csv file.
 *
 * ![ContextSerializerCsv](ContextSerializerCsv.png)
 *
 * @uml ContextSerializerCsv.png
 * !include resources/org/thegalactic/context/io/ContextSerializerCsv.iuml
 * !include resources/org/thegalactic/io/Reader.iuml
 * !include resources/org/thegalactic/io/Writer.iuml
 *
 * hide members
 * show ContextSerializerCsv members
 * class ContextSerializerCsv #LightCyan
 * title ContextSerializerCsv UML graph
 */
public final class ContextSerializerCsv implements Reader<Context>, Writer<Context> {

    /**
     * The singleton instance.
     */
    private static final ContextSerializerCsv INSTANCE = new ContextSerializerCsv();

    /**
     * Return the singleton instance of this class.
     *
     * @return the singleton instance
     */
    public static ContextSerializerCsv getInstance() {
        return INSTANCE;
    }

    /**
     * Register this class for reading .csv files.
     */
    public static void register() {
        ContextIOFactory.getInstance().registerReader(ContextSerializerCsv.getInstance(), "csv");
        ContextIOFactory.getInstance().registerWriter(ContextSerializerCsv.getInstance(), "csv");
    }

    /**
     * This class is not designed to be publicly instantiated.
     */
    private ContextSerializerCsv() {
    }

    /**
     * Read a context from a csv file.
     *
     * The following format is respected:
     *
     * The first line contains the attribute names, the other lines contains the
     * observations identifier followed by boolean values
     *
     * ~~~
     * "",a,b,c,d,e
     * 1,1,0,1,0,0
     * 2,1,1,0,0,0
     * 3,0,1,0,1,1
     * 4,0,0,1,0,1
     * ~~~
     *
     * If the first attribute is the empty string, the first column corresponds
     * to the individual identifiers. In the other case, the individual
     * identifiers will be generated by successive integers.
     *
     * ~~~
     * a,b,c,d,e
     * 1,0,1,0,0
     * 1,1,0,0,0
     * 0,1,0,1,1
     * 0,0,1,0,1
     * ~~~
     *
     * @param context a context to read
     * @param file    a file
     *
     * @throws IOException When an IOException occurs
     */
    public void read(Context context, BufferedReader file) throws IOException {
        // Parse the file
        CSVParser parser = CSVFormat.RFC4180.parse(file);

        // Get the records and record size
        List<CSVRecord> records = parser.getRecords();
        int length = records.size();

        // Verify length
        if (length == 0) {
            throw new IOException("CSV cannot be empty");
        }

        // Get the attributes and the attribute size
        CSVRecord attributes = records.get(0);
        int size = attributes.size();

        // Detect invalid attribute size
        if (size == 1 && attributes.get(0).equals("")) {
            throw new IOException("Attribute size cannot be 0");
        }

        // Index of the first attribute
        int first = 0;
        if (attributes.get(0).equals("")) {
            first = 1;
        }

        // Get the attributes
        for (int i = first; i < size; i++) {
            String attribute = attributes.get(i);

            // Detect duplicated attribute
            if (!context.addToAttributes(attribute)) {
                throw new IOException("Duplicated attribute");
            }

            // Detect empty attribute
            if ("".equals(attribute)) {
                throw new IOException("Empty attribute");
            }
        }

        // Get the data
        for (int j = 1; j < length; j++) {
            // Get the current record
            CSVRecord record = records.get(j);

            // Detect incorrect size
            if (record.size() != size) {
                throw new IOException("Line does not have the correct number of attributes");
            }

            // Get the observation identifier
            String identifier;
            if (first == 1) {
                identifier = record.get(0);
            } else {
                identifier = String.valueOf(j);
            }

            // Detect duplicated identifier
            if (!context.addToObservations(identifier)) {
                throw new IOException("Duplicated identifier");
            }

            // Add the extent/intent for the current identifier and current attribute
            for (int i = first; i < size; i++) {
                if (record.get(i).equals("1")) {
                    context.addExtentIntent(identifier, attributes.get(i));
                }
            }
        }

        // Close the parser
        parser.close();
        context.setBitSets();
    }

    /**
     * Write a context to a csv file.
     *
     * The following format is respected:
     *
     * The first line contains the attribute names, the other lines contains the
     * observations identifier followed by boolean values
     *
     * ~~~
     * "",a,b,c,d,e
     * 1,1,0,1,0,0
     * 2,1,1,0,0,0
     * 3,0,1,0,1,1
     * 4,0,0,1,0,1
     * ~~~
     *
     * @param context a context to write
     * @param file    a file
     *
     * @throws IOException When an IOException occurs
     */
    public void write(Context context, BufferedWriter file) throws IOException {
        CSVPrinter printer = new CSVPrinter(file, CSVFormat.RFC4180);

        // Get the observations and the attributes
        TreeSet<Comparable> observations = context.getObservations();
        TreeSet<Comparable> attributes = context.getAttributes();

        // Prepare the attribute line
        printer.print("");

        for (Comparable attribute : attributes) {
            // Write each attribute
            printer.print(attribute);
        }

        printer.println();

        for (Comparable observation : observations) {
            // Write the observation
            printer.print(observation);

            // Write the extent/intents
            for (Comparable attribute : attributes) {
                if (context.getIntent(observation).contains(attribute)) {
                    printer.print(1);
                } else {
                    printer.print(0);
                }
            }

            printer.println();
        }

        printer.close();
    }
}