diff --git a/src/cluster/cluster.cpp b/src/cluster/cluster.cpp
index bcc959da32dd4748ab05d4724409593799e4de29..8429523f47bd069333baf5be16045f37e07b6c16 100644
--- a/src/cluster/cluster.cpp
+++ b/src/cluster/cluster.cpp
@@ -258,34 +258,39 @@ void cluster(const string& config_file, const string& data_file, const string& o
 #pragma omp barrier
 	{
 	  // thread 0 already wrote on global files, skip it and take care of appending the others
-	  for (int ri = 1; ri < ompnumthreads; ri++) {
+	  for (int ri = 0; ri < ompnumthreads; ri++) {
+	    // still, we need to remove all c_OCLU_RANK_0 files
 	    string partial_file_name = output_path + "/c_OCLU_" + to_string(mpidata->rank) + "_" + to_string(ri);
-	    string message = "Copying ASCII output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
-	    logger->log(message, LOG_DEBG);
-	    FILE *partial_output = fopen(partial_file_name.c_str(), "r");
-	    int c = fgetc(partial_output);
-	    while (c != EOF) {
-	      fputc(c, output);
-	      c = fgetc(partial_output);
+	    if (ri == 0) {
+	      remove(partial_file_name.c_str());
+	    } else {
+	      string message = "Copying ASCII output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	      logger->log(message, LOG_DEBG);
+	      FILE *partial_output = fopen(partial_file_name.c_str(), "r");
+	      int c = fgetc(partial_output);
+	      while (c != EOF) {
+		fputc(c, output);
+		c = fgetc(partial_output);
+	      }
+	      fclose(partial_output);
+	      remove(partial_file_name.c_str());
+	      logger->log("done.\n", LOG_DEBG);
+	      partial_file_name = output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(ri);
+	      message = "Copying binary output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
+	      logger->log(message, LOG_DEBG);
+	      fstream partial_tppoan;
+	      partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary);
+	      partial_tppoan.seekg(0, ios::end);
+	      long buffer_size = partial_tppoan.tellg();
+	      char *binary_buffer = new char[buffer_size];
+	      partial_tppoan.seekg(0, ios::beg);
+	      partial_tppoan.read(binary_buffer, buffer_size);
+	      tppoan.write(binary_buffer, buffer_size);
+	      partial_tppoan.close();
+	      delete[] binary_buffer;
+	      remove(partial_file_name.c_str());
+	      logger->log("done.\n", LOG_DEBG);
 	    }
-	    fclose(partial_output);
-	    remove(partial_file_name.c_str());
-	    logger->log("done.\n", LOG_DEBG);
-	    partial_file_name = output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(ri);
-	    message = "Copying binary output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";
-	    logger->log(message, LOG_DEBG);
-	    fstream partial_tppoan;
-	    partial_tppoan.open(partial_file_name.c_str(), ios::in | ios::binary);
-	    partial_tppoan.seekg(0, ios::end);
-	    long buffer_size = partial_tppoan.tellg();
-	    char *binary_buffer = new char[buffer_size];
-	    partial_tppoan.seekg(0, ios::beg);
-	    partial_tppoan.read(binary_buffer, buffer_size);
-	    tppoan.write(binary_buffer, buffer_size);
-	    partial_tppoan.close();
-	    delete[] binary_buffer;
-	    remove(partial_file_name.c_str());
-	    logger->log("done.\n", LOG_DEBG);
 	  }
 	}
 #endif
@@ -453,6 +458,7 @@ void cluster(const string& config_file, const string& data_file, const string& o
 	MPI_Send(&chunk_buffer_size, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
 	partial_output.close();
 	delete[] chunk_buffer;
+	remove(partial_file_name.c_str());
 
 	partial_file_name = output_path + "/c_TPPOAN_" + to_string(mpidata->rank) + "_" + to_string(ri);
 	message = "Copying binary output in MPI process " + to_string(mpidata->rank) + " of thread " + to_string(ri) + " of " + to_string(ompnumthreads - 1) + "... ";