From 80502b4a456347de654554a44a3b54c8118dbfdb Mon Sep 17 00:00:00 2001
From: Giovanni La Mura <giovanni.lamura@inaf.it>
Date: Wed, 10 Jan 2024 17:33:41 +0100
Subject: [PATCH] Define HDF5 I/O wrapper library

---
 src/include/file_io.h   |  78 +++++++++++++++++++--
 src/libnptm/file_io.cpp | 147 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 216 insertions(+), 9 deletions(-)

diff --git a/src/include/file_io.h b/src/include/file_io.h
index 180dd676..9801804a 100644
--- a/src/include/file_io.h
+++ b/src/include/file_io.h
@@ -19,6 +19,8 @@ class FileSchema {
  protected:
   //! \brief Number of records conained in the file.
   int num_records;
+  //! \brief Array of record names.
+  std::string *record_names;
   //! \brief Array of record descriptors.
   std::string *record_types;
 
@@ -28,11 +30,28 @@ class FileSchema {
    * \param num_rec: `int` Number of records in the file.
    * \param rec_types: `string *` Description of the records in the file.
    */
-  FileSchema(int num_rec, std::string *rec_types);
+  FileSchema(int num_rec, std::string *rec_types, std::string *rec_names=NULL);
 
   /*! \brief FileSchema instance destroyer.
    */
   ~FileSchema();
+
+  /*! \brief Get the number of records in file.
+   *
+   * \return num_records: `int` The number of records contained in the file.
+   */
+  int get_record_number() { return num_records; }
+
+  /*! \brief Get a copy of the record types.
+   *
+   * \return rec_types: `string *` A new vector of strings with description of records.
+   */
+  std::string *get_record_names();
+  /*! \brief Get a copy of the record names.
+   *
+   * \return rec_names: `string *` A new vector of strings with record names.
+   */
+  std::string *get_record_types();
 };
 
 /*! \class HDFFile
@@ -43,28 +62,56 @@ class FileSchema {
  */
 class HDFFile {
  protected:
+  //! \brief Identifier list.
+  List<hid_t> *id_list;
+  //! \brief Name of the file.
   std::string file_name;
+  //! \brief Flag for the open file status.
   bool file_open_flag;
+  //! File identifier handle.
   hid_t file_id;
+  //! Return status of the last operation.
   herr_t status;
 
  public:
   /*! \brief HDFFile instance constructor.
    *
    * \param name: `string` Name of the file.
-   * \param flags: `unsigned int` File access flags.
-   * \param fcpl_id: `hid_t` File creation property list identifier.
-   * \param fapl_id: `hid_t` File access property list identifier.
+   * \param flags: `unsigned int` File access flags (default is `H5F_ACC_EXCL`).
+   * \param fcpl_id: `hid_t` File creation property list identifier (default is `H5P_DEFAULT`).
+   * \param fapl_id: `hid_t` File access property list identifier (default is `H5P_DEFAULT`).
    */
   HDFFile(
 	  std::string name, unsigned int flags = H5F_ACC_EXCL,
 	  hid_t fcpl_id = H5P_DEFAULT, hid_t fapl_id = H5P_DEFAULT
-);
+  );
 
+  /*! \brief HDFFile instance destroyer.
+   */
+  ~HDFFile();
+  
   /*! \brief Close the current file.
    */
   herr_t close();
 
+  /*! \brief Create an empty file from a `FileSchema` instance.
+   *
+   * \param schema: `FileSchema &` Reference to `FileSchema` instance.
+   * \param name: `string` Name of the file.
+   * \param flags: `unsigned int` File access flags (default is `H5F_ACC_EXCL`).
+   * \param fcpl_id: `hid_t` File creation property list identifier (default is `H5P_DEFAULT`).
+   * \param fapl_id: `hid_t` File access property list identifier (default is `H5P_DEFAULT`).
+   * \return hdf_file: `HDFFile *` Pointer to a new, open HDF5 file.
+   */
+  static HDFFile* from_schema(
+			      FileSchema &schema, std::string name, unsigned int flags = H5F_ACC_EXCL,
+			      hid_t fcpl_id = H5P_DEFAULT, hid_t fapl_id = H5P_DEFAULT
+  );
+
+  /*! \brief Get current status.
+   */
+  hid_t get_file_id() { return file_id; }
+  
   /*! \brief Get current status.
    */
   herr_t get_status() { return status; }
@@ -72,5 +119,26 @@ class HDFFile {
   /*! \brief Check whether the attached file is currently open.
    */
   bool is_open() { return file_open_flag; }
+  
+  /* ! \brief Read data from attached file.
+   */
+  // herr_t read();
+  
+  /*! \brief Write data to attached file.
+   *
+   * \param dataset_name: `string` Name of the dataset to write to.
+   * \param data_type: `string` Memory data type identifier.
+   * \param buffer: `hid_t` Starting address of the memory sector to be written.
+   * \param mem_space_id: `hid_t` Memory data space identifier (defaults to `H5S_ALL`).
+   * \param file_space_id: `hid_t` File space identifier (defaults to `H5S_ALL`).
+   * \param dapl_id: `hid_t` Data access property list identifier (defaults to `H5P_DEFAULT`).
+   * \param dxpl_id: `hid_t` Data transfer property list identifier (defaults to `H5P_DEFAULT`).
+   * \return status: `herr_t` Exit status of the operation.
+   */
+  herr_t write(
+	       std::string dataset_name, std::string data_type, const void *buffer,
+	       hid_t mem_space_id=H5S_ALL, hid_t file_space_id=H5S_ALL,
+	       hid_t dapl_id=H5P_DEFAULT, hid_t dxpl_id=H5P_DEFAULT
+  );
 };
 #endif
diff --git a/src/libnptm/file_io.cpp b/src/libnptm/file_io.cpp
index f73d8ff9..3f46bd6a 100644
--- a/src/libnptm/file_io.cpp
+++ b/src/libnptm/file_io.cpp
@@ -2,32 +2,171 @@
  *
  * \brief Implementation of file I/O operations.
  */
+#include <stdexcept>
+#include <regex>
 #include <string>
 #include "hdf5.h"
 
+#ifndef INCLUDE_LIST_H_
+#include "List.h"
+#endif
+
 #ifndef INCLUDE_FILE_IO_H_
-#include "../include/file_io.h"
+#include "file_io.h"
 #endif
 
 using namespace std;
 
-FileSchema::FileSchema(int num_rec, string *rec_types) {
+FileSchema::FileSchema(int num_rec, string *rec_types, string *rec_names) {
   num_records = num_rec;
   record_types = new string[num_rec];
-  for (int i = 0; i < num_rec; i++) record_types[i] = rec_types[i];
+  record_names = new string[num_rec];
+  for (int i = 0; i < num_rec; i++) {
+    record_types[i] = rec_types[i];
+    if (rec_names != NULL) record_names[i] = rec_names[i];
+    else record_names[i] = "/dset" + to_string(i);
+  }
+}
+
+FileSchema::~FileSchema() {
+  delete[] record_names;
+  delete[] record_types;
 }
 
-FileSchema::~FileSchema() { delete[] record_types; }
+string* FileSchema::get_record_names() {
+  string *rec_names = new string[num_records];
+  for (int i = 0; i < num_records; i++) rec_names[i] = record_names[i];
+  return rec_names;
+}
+
+string* FileSchema::get_record_types() {
+  string *rec_types = new string[num_records];
+  for (int i = 0; i < num_records; i++) rec_types[i] = record_types[i];
+  return rec_types;
+}
 
 HDFFile::HDFFile(string name, unsigned int flags, hid_t fcpl_id, hid_t fapl_id) {
   file_name = name;
   file_id = H5Fcreate(name.c_str(), flags, fcpl_id, fapl_id);
+  id_list = new List<hid_t>(1);
+  id_list->set(0, file_id);
   if (file_id != H5I_INVALID_HID) file_open_flag = true;
   status = (herr_t)0;
 }
 
+HDFFile::~HDFFile() {
+  if (H5Iis_valid(file_id) > 0) status = H5Fclose(file_id);
+  delete id_list;
+}
+
 herr_t HDFFile::close() {
   status = H5Fclose(file_id);
   if (status == 0) file_open_flag = false;
   return status;
 }
+
+HDFFile* HDFFile::from_schema(
+			      FileSchema &schema, string name, unsigned int flags,
+			      hid_t fcpl_id, hid_t fapl_id
+) {
+  HDFFile *hdf_file = new HDFFile(name, flags, fcpl_id, fapl_id);
+  hid_t file_id = hdf_file->get_file_id();
+  herr_t status;
+  string *rec_types = schema.get_record_types();
+  string *rec_names = schema.get_record_names();
+  string known_types[] = {"INT32", "FLOAT64"};
+  int rec_num = schema.get_record_number();
+  regex re;
+  smatch m;
+  for (int ri = 0; ri < rec_num; ri++) {
+    int rank = 0;
+    hsize_t *dims, *max_dims;
+    hid_t data_type;
+    string str_target = rec_types[ri];
+    int type_index = 0;
+    bool found_type = false;
+    while (!found_type) {
+      re = regex(known_types[type_index++]);
+      if (regex_search(str_target, m, re)) {
+	found_type = true;
+	str_target = m.suffix().str();
+	if (type_index == 1) data_type = H5Tcopy(H5T_NATIVE_INT);
+	else if (type_index == 2) data_type = H5Tcopy(H5T_NATIVE_DOUBLE);
+      }
+      if (type_index == 2) break;
+    }
+    if (found_type) {
+      re = regex("[0-9]+");
+      string old_target = str_target;
+      while (regex_search(str_target, m, re)) {
+	rank++;
+	str_target = m.suffix().str();
+      }
+      dims = new hsize_t[rank]();
+      max_dims = new hsize_t[rank]();
+      str_target = old_target;
+      for (int ti = 0; ti < rank; ti++) {
+	regex_search(str_target, m, re);
+	hsize_t dim = (hsize_t)stoi(m.str());
+	dims[ti] = dim;
+	max_dims[ti] = dim;
+	str_target = m.suffix().str();
+      }
+      hid_t dataspace_id = H5Screate_simple(rank, dims, max_dims);
+      hid_t dataset_id = H5Dcreate(
+				   file_id, rec_names[ri].c_str(), data_type, dataspace_id, H5P_DEFAULT,
+				   H5P_DEFAULT, H5P_DEFAULT
+				   );
+      status = H5Sclose(dataspace_id);
+      status = H5Dclose(dataset_id);
+      delete[] dims;
+      delete[] max_dims;
+    } else {
+      string message = "unrecognized type \"" + rec_types[ri] + "\"\n";
+      throw runtime_error(message);
+    }
+  }
+
+  delete[] rec_types;
+  delete[] rec_names;
+  return hdf_file;
+}
+
+herr_t HDFFile::write(
+		      string dataset_name, string data_type, const void *buffer,
+		      hid_t mem_space_id, hid_t file_space_id, hid_t dapl_id,
+		      hid_t dxpl_id
+) {
+  string known_types[] = {"INT32", "FLOAT64"};
+  regex re;
+  smatch m;
+  bool found_type = false;
+  int type_index = 0;
+  while (!found_type) {
+    re = regex(known_types[type_index++]);
+    found_type = regex_search(data_type, m, re);
+    if (type_index == 2) break;
+  }
+  if (found_type) {
+    hid_t dataset_id = H5Dopen2(file_id, dataset_name.c_str(), dapl_id);
+    hid_t mem_type_id;
+    switch (type_index) {
+    case 1:
+      mem_type_id = H5T_NATIVE_INT; break;
+    case 2:
+      mem_type_id = H5T_NATIVE_DOUBLE; break;
+    default:
+      throw runtime_error("Unrecognized data type \"" + data_type + "\"");
+    }
+    if (dataset_id != H5I_INVALID_HID) {
+      status = H5Dwrite(dataset_id, mem_type_id, mem_space_id, file_space_id, dxpl_id, buffer);
+      if (status == 0) status = H5Dclose(dataset_id);
+      else status = (herr_t)-2;
+    } else {
+      status = (herr_t)-1;
+    }
+  } else {
+    throw runtime_error("Unrecognized data type \"" + data_type + "\"");
+  }
+  return status;
+}
-- 
GitLab