ContextSerializerCsv.java

  1. package org.thegalactic.context.io;

  2. /*
  3.  * ContextSerializerCsv.java
  4.  *
  5.  * Copyright: 2010-2015 Karell Bertet, France
  6.  * Copyright: 2015-2016 The Galactic Organization, France
  7.  *
  8.  * License: http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html CeCILL-B license
  9.  *
  10.  * This file is part of java-lattices.
  11.  * You can redistribute it and/or modify it under the terms of the CeCILL-B license.
  12.  */
  13. import java.io.BufferedReader;
  14. import java.io.BufferedWriter;
  15. import java.io.IOException;
  16. import java.util.List;
  17. import java.util.TreeSet;

  18. import org.apache.commons.csv.CSVFormat;
  19. import org.apache.commons.csv.CSVParser;
  20. import org.apache.commons.csv.CSVPrinter;
  21. import org.apache.commons.csv.CSVRecord;

  22. import org.thegalactic.context.Context;
  23. import org.thegalactic.io.Reader;
  24. import org.thegalactic.io.Writer;

  25. /**
  26.  * This class defines the way for reading a context from a csv file.
  27.  *
  28.  * ![ContextSerializerCsv](ContextSerializerCsv.png)
  29.  *
  30.  * @uml ContextSerializerCsv.png
  31.  * !include resources/org/thegalactic/context/io/ContextSerializerCsv.iuml
  32.  * !include resources/org/thegalactic/io/Reader.iuml
  33.  * !include resources/org/thegalactic/io/Writer.iuml
  34.  *
  35.  * hide members
  36.  * show ContextSerializerCsv members
  37.  * class ContextSerializerCsv #LightCyan
  38.  * title ContextSerializerCsv UML graph
  39.  */
  40. public final class ContextSerializerCsv implements Reader<Context>, Writer<Context> {

  41.     /**
  42.      * The singleton instance.
  43.      */
  44.     private static final ContextSerializerCsv INSTANCE = new ContextSerializerCsv();

  45.     /**
  46.      * Return the singleton instance of this class.
  47.      *
  48.      * @return the singleton instance
  49.      */
  50.     public static ContextSerializerCsv getInstance() {
  51.         return INSTANCE;
  52.     }

  53.     /**
  54.      * Register this class for reading .csv files.
  55.      */
  56.     public static void register() {
  57.         ContextIOFactory.getInstance().registerReader(ContextSerializerCsv.getInstance(), "csv");
  58.         ContextIOFactory.getInstance().registerWriter(ContextSerializerCsv.getInstance(), "csv");
  59.     }

  60.     /**
  61.      * This class is not designed to be publicly instantiated.
  62.      */
  63.     private ContextSerializerCsv() {
  64.     }

  65.     /**
  66.      * Read a context from a csv file.
  67.      *
  68.      * The following format is respected:
  69.      *
  70.      * The first line contains the attribute names, the other lines contains the
  71.      * observations identifier followed by boolean values
  72.      *
  73.      * ~~~
  74.      * "",a,b,c,d,e
  75.      * 1,1,0,1,0,0
  76.      * 2,1,1,0,0,0
  77.      * 3,0,1,0,1,1
  78.      * 4,0,0,1,0,1
  79.      * ~~~
  80.      *
  81.      * If the first attribute is the empty string, the first column corresponds
  82.      * to the individual identifiers. In the other case, the individual
  83.      * identifiers will be generated by successive integers.
  84.      *
  85.      * ~~~
  86.      * a,b,c,d,e
  87.      * 1,0,1,0,0
  88.      * 1,1,0,0,0
  89.      * 0,1,0,1,1
  90.      * 0,0,1,0,1
  91.      * ~~~
  92.      *
  93.      * @param context a context to read
  94.      * @param file    a file
  95.      *
  96.      * @throws IOException When an IOException occurs
  97.      */
  98.     public void read(Context context, BufferedReader file) throws IOException {
  99.         // Parse the file
  100.         CSVParser parser = CSVFormat.RFC4180.parse(file);

  101.         // Get the records and record size
  102.         List<CSVRecord> records = parser.getRecords();
  103.         int length = records.size();

  104.         // Verify length
  105.         if (length == 0) {
  106.             throw new IOException("CSV cannot be empty");
  107.         }

  108.         // Get the attributes and the attribute size
  109.         CSVRecord attributes = records.get(0);
  110.         int size = attributes.size();

  111.         // Detect invalid attribute size
  112.         if (size == 1 && attributes.get(0).equals("")) {
  113.             throw new IOException("Attribute size cannot be 0");
  114.         }

  115.         // Index of the first attribute
  116.         int first = 0;
  117.         if (attributes.get(0).equals("")) {
  118.             first = 1;
  119.         }

  120.         // Get the attributes
  121.         for (int i = first; i < size; i++) {
  122.             String attribute = attributes.get(i);

  123.             // Detect duplicated attribute
  124.             if (!context.addToAttributes(attribute)) {
  125.                 throw new IOException("Duplicated attribute");
  126.             }

  127.             // Detect empty attribute
  128.             if ("".equals(attribute)) {
  129.                 throw new IOException("Empty attribute");
  130.             }
  131.         }

  132.         // Get the data
  133.         for (int j = 1; j < length; j++) {
  134.             // Get the current record
  135.             CSVRecord record = records.get(j);

  136.             // Detect incorrect size
  137.             if (record.size() != size) {
  138.                 throw new IOException("Line does not have the correct number of attributes");
  139.             }

  140.             // Get the observation identifier
  141.             String identifier;
  142.             if (first == 1) {
  143.                 identifier = record.get(0);
  144.             } else {
  145.                 identifier = String.valueOf(j);
  146.             }

  147.             // Detect duplicated identifier
  148.             if (!context.addToObservations(identifier)) {
  149.                 throw new IOException("Duplicated identifier");
  150.             }

  151.             // Add the extent/intent for the current identifier and current attribute
  152.             for (int i = first; i < size; i++) {
  153.                 if (record.get(i).equals("1")) {
  154.                     context.addExtentIntent(identifier, attributes.get(i));
  155.                 }
  156.             }
  157.         }

  158.         // Close the parser
  159.         parser.close();
  160.         context.setBitSets();
  161.     }

  162.     /**
  163.      * Write a context to a csv file.
  164.      *
  165.      * The following format is respected:
  166.      *
  167.      * The first line contains the attribute names, the other lines contains the
  168.      * observations identifier followed by boolean values
  169.      *
  170.      * ~~~
  171.      * "",a,b,c,d,e
  172.      * 1,1,0,1,0,0
  173.      * 2,1,1,0,0,0
  174.      * 3,0,1,0,1,1
  175.      * 4,0,0,1,0,1
  176.      * ~~~
  177.      *
  178.      * @param context a context to write
  179.      * @param file    a file
  180.      *
  181.      * @throws IOException When an IOException occurs
  182.      */
  183.     public void write(Context context, BufferedWriter file) throws IOException {
  184.         CSVPrinter printer = new CSVPrinter(file, CSVFormat.RFC4180);

  185.         // Get the observations and the attributes
  186.         TreeSet<Comparable> observations = context.getObservations();
  187.         TreeSet<Comparable> attributes = context.getAttributes();

  188.         // Prepare the attribute line
  189.         printer.print("");

  190.         for (Comparable attribute : attributes) {
  191.             // Write each attribute
  192.             printer.print(attribute);
  193.         }

  194.         printer.println();

  195.         for (Comparable observation : observations) {
  196.             // Write the observation
  197.             printer.print(observation);

  198.             // Write the extent/intents
  199.             for (Comparable attribute : attributes) {
  200.                 if (context.getIntent(observation).contains(attribute)) {
  201.                     printer.print(1);
  202.                 } else {
  203.                     printer.print(0);
  204.                 }
  205.             }

  206.             printer.println();
  207.         }

  208.         printer.close();
  209.     }
  210. }