From e41009904e2346c71f3fe07fd2b671ec663b472f Mon Sep 17 00:00:00 2001
From: Stefano Alberto Russo <stefano.russo@gmail.com>
Date: Mon, 29 Nov 2021 01:33:05 +0100
Subject: [PATCH] Added the standalone computing resource service with Podman,
 Docker and Singularity support. Improved demo slurm cluster naming.

---
 docker-compose-dev.yml                        | 21 +++++++----
 rosetta/build                                 |  1 +
 services/slurmbase/slurm.conf                 | 19 +++++-----
 services/standaloneworker/Dockerfile          | 36 +++++++++++++++++++
 services/standaloneworker/entrypoint.sh       | 23 ++++++++++++
 services/standaloneworker/keys/id_rsa.pub     |  1 +
 services/standaloneworker/subgid              |  1 +
 services/standaloneworker/subuid              |  1 +
 .../rosetta/core_app/computing_managers.py    |  6 ++--
 .../management/commands/core_app_populate.py  |  8 ++---
 10 files changed, 94 insertions(+), 23 deletions(-)
 create mode 100755 services/standaloneworker/Dockerfile
 create mode 100644 services/standaloneworker/entrypoint.sh
 create mode 100644 services/standaloneworker/keys/id_rsa.pub
 create mode 100644 services/standaloneworker/subgid
 create mode 100644 services/standaloneworker/subuid

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 16bc7b9..2e83dff 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -1,10 +1,10 @@
 version: '3'
 services:
 
-  slurmclustermaster-main:
+  slurmclustermaster:
     image: "rosetta/slurmclustermaster"
-    container_name: slurmclustermaster-main
-    hostname: slurmclustermaster-main
+    container_name: slurmclustermaster
+    hostname: slurmclustermaster
     environment:
       - SAFEMODE=False
     privileged: true
@@ -12,10 +12,10 @@ services:
       - ./data/shared:/shared
       # - ./data/singularity_cache:/rosetta/.singularity/cache # Not working, check permissions...
 
-  slurmclusterworker-one:
+  slurmclusterworker:
     image: "rosetta/slurmclusterworker"
-    container_name: slurmclusterworker-one
-    hostname: slurmclusterworker-one
+    container_name: slurmclusterworker
+    hostname: slurmclusterworker
     environment:
       - SAFEMODE=False
     privileged: true
@@ -23,6 +23,15 @@ services:
       - ./data/shared:/shared
       - /var/run/docker.sock:/var/run/docker.sock
 
+  standaloneworker:
+    image: "rosetta/standaloneworker"
+    container_name: standaloneworker
+    hostname: standaloneworker
+    privileged: true
+    volumes:
+      - ./data/shared:/shared
+      - /var/run/docker.sock:/var/run/docker.sock
+
   dregistry:
     container_name: dregistry
     hostname: dregistry
diff --git a/rosetta/build b/rosetta/build
index 10dae55..4d4b0e1 100755
--- a/rosetta/build
+++ b/rosetta/build
@@ -34,6 +34,7 @@ if [[ "x$SERVICE" == "x" ]] ; then
     $BUILD_COMMAND services/slurmcluster -t rosetta/slurmcluster    
     $BUILD_COMMAND services/slurmclustermaster -t rosetta/slurmclustermaster    
     $BUILD_COMMAND services/slurmclusterworker -t rosetta/slurmclusterworker    
+    $BUILD_COMMAND services/standaloneworker -t rosetta/standaloneworker    
     $BUILD_COMMAND services/dregistry -t rosetta/dregistry
     $BUILD_COMMAND services/webapp -t rosetta/webapp
     $BUILD_COMMAND services/postgres -t rosetta/postgres
diff --git a/services/slurmbase/slurm.conf b/services/slurmbase/slurm.conf
index 74bb787..a2c46d1 100755
--- a/services/slurmbase/slurm.conf
+++ b/services/slurmbase/slurm.conf
@@ -2,7 +2,7 @@
 # Put this file on all nodes of your cluster.
 # See the slurm.conf man page for more information.
 #
-ControlMachine=slurmclustermaster-main
+ControlMachine=slurmclustermaster
 #ControlAddr=
 #BackupController=
 #BackupAddr=
@@ -155,16 +155,15 @@ SlurmdLogFile=/var/log/slurm-llnl/slurmd.log
 #SuspendRate=
 #SuspendTime=
 #
-# Must add controller node explictly but don't place it into any partition
-NodeName=slurmclustermaster-main CPUs=1 State=UNKNOWN
-#NodeName=partitiona-instrument CPUs=1 State=UNKNOWN
-#NodeName=partitionb-instrument CPUs=1 State=UNKNOWN
-#NodeName=cris-instrument CPUs=1 State=UNKNOWN
+# Must add controller node explicitly but don't place it into any partition
+NodeName=slurmclustermaster CPUs=1 State=UNKNOWN
+#
 # COMPUTE NODES
-NodeName=slurmclusterworker-one CPUs=1 State=UNKNOWN
-#NodeName=slurmclusterworker-two CPUs=1 State=UNKNOWN
-PartitionName=partition1 Nodes=slurmclusterworker-one MaxTime=INFINITE State=UP
-#PartitionName=partition2 Nodes=slurmclusterworker-two MaxTime=INFINITE State=UP
+NodeName=slurmclusterworker CPUs=1 State=UNKNOWN
+#NodeName=slurmclusterworker-multi-one CPUs=1 State=UNKNOWN
+#NodeName=slurmclusterworker-multi-two CPUs=1 State=UNKNOWN
+PartitionName=partition1 Nodes=slurmclusterworker MaxTime=INFINITE State=UP
+#PartitionName=partition2 Nodes=slurmclusterworker-multi-one,slurmclusterworker-multi-two MaxTime=INFINITE State=UP
 
 
 
diff --git a/services/standaloneworker/Dockerfile b/services/standaloneworker/Dockerfile
new file mode 100755
index 0000000..66c9815
--- /dev/null
+++ b/services/standaloneworker/Dockerfile
@@ -0,0 +1,36 @@
+FROM quay.io/podman/stable:v3.2.3
+
+# This is necessary due to some base image permission errors.
+RUN chown -R podman:podman /home/podman
+
+# Change user
+RUN usermod -l testuser podman
+RUN usermod -d /home/testuser testuser
+RUN ln -s /home/podman /home/testuser
+RUN groupmod -n testuser podman
+
+# Replace uid/gid mapping from podman to testuser user
+COPY subuid /etc/subuid
+COPY subgid /etc/subgid
+
+#RUN dnf repolist 
+#RUN dnf update --refresh
+RUN dnf install -y docker singularity openssh-server
+RUN ssh-keygen -A
+RUN mkdir /home/testuser/.ssh
+COPY keys/id_rsa.pub /home/testuser/.ssh/authorized_keys
+RUN dnf install -y python wget
+
+
+#----------------------
+# Entrypoint
+#----------------------
+
+# Copy entrypoint
+COPY entrypoint.sh /
+
+# Give right permissions
+RUN chmod 755 /entrypoint.sh
+
+# Set entrypoint
+ENTRYPOINT ["/entrypoint.sh"]
\ No newline at end of file
diff --git a/services/standaloneworker/entrypoint.sh b/services/standaloneworker/entrypoint.sh
new file mode 100644
index 0000000..fd2f047
--- /dev/null
+++ b/services/standaloneworker/entrypoint.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Exit on any error. More complex thing could be done in future
+# (see https://stackoverflow.com/questions/4381618/exit-a-script-on-error)
+set -e
+
+# Fix FUSE permissions
+chmod 777 /dev/fuse
+
+#---------------------
+#  Entrypoint command
+#---------------------
+
+if [[ "x$@" == "x" ]] ; then
+    echo -n "[INFO] Executing Docker entrypoint command: /usr/sbin/sshd -D"
+    /usr/sbin/sshd -D
+else
+    ENTRYPOINT_COMMAND=$@
+    echo -n "[INFO] Executing Docker entrypoint command: "
+    echo $ENTRYPOINT_COMMAND
+    exec "$ENTRYPOINT_COMMAND"
+fi
+#exec sudo -i -u testuser /bin/bash -c "$ENTRYPOINT_COMMAND"
diff --git a/services/standaloneworker/keys/id_rsa.pub b/services/standaloneworker/keys/id_rsa.pub
new file mode 100644
index 0000000..9a0504b
--- /dev/null
+++ b/services/standaloneworker/keys/id_rsa.pub
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC2n4wiLiRmE1sla5+w0IW3wwPW/mqhhkm7IyCBS+rGTgnts7xsWcxobvamNdD6KSLNnjFZbBb7Yaf/BvWrwQgdqIFVU3gRWHYzoU6js+lKtBjd0e2DAVGivWCKEkSGLx7zhx7uH/Jt8kyZ4NaZq0p5+SFHBzePdR/1rURd8G8+G3OaCPKqP+JQT4RMUQHC5SNRJLcK1piYdmhDiYEyuQG4FlStKCWLCXeUY2EVirNMeQIfOgbUHJsVjH07zm1y8y7lTWDMWVZOnkG6Ap5kB+n4l1eWbslOKgDv29JTFOMU+bvGvYZh70lmLK7Hg4CMpXVgvw5VF9v97YiiigLwvC7wasBHaASwH7wUqakXYhdGFxJ23xVMSLnvJn4S++4L8t8bifRIVqhT6tZCPOU4fdOvJKCRjKrf7gcW/E33ovZFgoOCJ2vBLIh9N9ME0v7tG15JpRtgIBsCXwLcl3tVyCZJ/eyYMbc3QJGsbcPGb2CYRjDbevPCQlNavcMdlyrNIke7VimM5aW8OBJKVh5wCNRpd9XylrKo1cZHYxu/c5Lr6VUZjLpxDlSz+IuTn4VE7vmgHNPnXdlxRKjLHG/FZrZTSCWFEBcRoSa/hysLSFwwDjKd9nelOZRNBvJ+NY48vA8ixVnk4WAMlR/5qhjTRam66BVysHeRcbjJ2IGjwTJC5Q== rosetta@rosetta.platform
diff --git a/services/standaloneworker/subgid b/services/standaloneworker/subgid
new file mode 100644
index 0000000..171e002
--- /dev/null
+++ b/services/standaloneworker/subgid
@@ -0,0 +1 @@
+testuser:10000:5000
\ No newline at end of file
diff --git a/services/standaloneworker/subuid b/services/standaloneworker/subuid
new file mode 100644
index 0000000..171e002
--- /dev/null
+++ b/services/standaloneworker/subuid
@@ -0,0 +1 @@
+testuser:10000:5000
\ No newline at end of file
diff --git a/services/webapp/code/rosetta/core_app/computing_managers.py b/services/webapp/code/rosetta/core_app/computing_managers.py
index 37d1406..3660ddf 100644
--- a/services/webapp/code/rosetta/core_app/computing_managers.py
+++ b/services/webapp/code/rosetta/core_app/computing_managers.py
@@ -288,7 +288,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
                         binds += ' -v{}:{}'.format(expanded_base_path, expanded_bind_path)
             
             # TODO: remove this hardcoding
-            prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else ''
+            prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
             
             run_command  = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host)
             run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir /tmp/{}_data && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid) 
@@ -334,7 +334,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
             internal_stop_command = 'kill -9 {}'.format(task.id)            
         elif container_runtime=='docker':
             # TODO: remove this hardcoding
-            prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else ''
+            prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
             internal_stop_command = '{} docker stop {} && {} docker rm {}'.format(prefix,task.id,prefix,task.id)
         else:
             raise NotImplementedError('Container runtime {} not supported'.format(container_runtime))
@@ -368,7 +368,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
             internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid)            
         elif container_runtime=='docker':
             # TODO: remove this hardcoding
-            prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else ''
+            prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
             internal_view_log_command = '{} docker logs {}'.format(prefix,task.id)
         else:
             raise NotImplementedError('Container runtime {} not supported'.format(container_runtime))
diff --git a/services/webapp/code/rosetta/core_app/management/commands/core_app_populate.py b/services/webapp/code/rosetta/core_app/management/commands/core_app_populate.py
index b3ac3be..754565b 100644
--- a/services/webapp/code/rosetta/core_app/management/commands/core_app_populate.py
+++ b/services/webapp/code/rosetta/core_app/management/commands/core_app_populate.py
@@ -275,11 +275,11 @@ to provide help, news and informations on your deployment. Or you can just ignor
                                                                  access_mode = 'ssh+cli',
                                                                  auth_mode = 'user_keys',
                                                                  wms = None,
-                                                                 conf = {'host': 'slurmclusterworker-one'},
-                                                                 container_runtimes = ['singularity'])
+                                                                 conf = {'host': 'standaloneworker'},
+                                                                 container_runtimes = ['singularity','podman'])
     
             # Add testuser extra conf for this computing resource
-            testuser.profile.add_extra_conf(conf_type = 'computing_user', object=demo_singlenode_computing, value= 'slurmtestuser')
+            testuser.profile.add_extra_conf(conf_type = 'computing_user', object=demo_singlenode_computing, value= 'testuser')
 
             #  Demo cluster computing plus conf
             demo_slurm_computing = Computing.objects.create(name = 'Demo Cluster',
@@ -288,7 +288,7 @@ to provide help, news and informations on your deployment. Or you can just ignor
                                                             access_mode = 'ssh+cli',
                                                             auth_mode = 'user_keys',
                                                             wms = 'slurm',
-                                                            conf = {'host': 'slurmclustermaster-main', 'default_partition': 'partition1'},
+                                                            conf = {'host': 'slurmclustermaster', 'default_partition': 'partition1'},
                                                             container_runtimes = ['singularity'])
            
             # Add testuser extra conf for this computing resource
-- 
GitLab