Skip to content
Snippets Groups Projects
Commit e4100990 authored by Stefano Alberto Russo's avatar Stefano Alberto Russo
Browse files

Added the standalone computing resource service with Podman, Docker and...

Added the standalone computing resource service with Podman, Docker and Singularity support. Improved demo slurm cluster naming.
parent 76b3ad5a
No related branches found
No related tags found
No related merge requests found
version: '3' version: '3'
services: services:
slurmclustermaster-main: slurmclustermaster:
image: "rosetta/slurmclustermaster" image: "rosetta/slurmclustermaster"
container_name: slurmclustermaster-main container_name: slurmclustermaster
hostname: slurmclustermaster-main hostname: slurmclustermaster
environment: environment:
- SAFEMODE=False - SAFEMODE=False
privileged: true privileged: true
...@@ -12,10 +12,10 @@ services: ...@@ -12,10 +12,10 @@ services:
- ./data/shared:/shared - ./data/shared:/shared
# - ./data/singularity_cache:/rosetta/.singularity/cache # Not working, check permissions... # - ./data/singularity_cache:/rosetta/.singularity/cache # Not working, check permissions...
slurmclusterworker-one: slurmclusterworker:
image: "rosetta/slurmclusterworker" image: "rosetta/slurmclusterworker"
container_name: slurmclusterworker-one container_name: slurmclusterworker
hostname: slurmclusterworker-one hostname: slurmclusterworker
environment: environment:
- SAFEMODE=False - SAFEMODE=False
privileged: true privileged: true
...@@ -23,6 +23,15 @@ services: ...@@ -23,6 +23,15 @@ services:
- ./data/shared:/shared - ./data/shared:/shared
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
standaloneworker:
image: "rosetta/standaloneworker"
container_name: standaloneworker
hostname: standaloneworker
privileged: true
volumes:
- ./data/shared:/shared
- /var/run/docker.sock:/var/run/docker.sock
dregistry: dregistry:
container_name: dregistry container_name: dregistry
hostname: dregistry hostname: dregistry
......
...@@ -34,6 +34,7 @@ if [[ "x$SERVICE" == "x" ]] ; then ...@@ -34,6 +34,7 @@ if [[ "x$SERVICE" == "x" ]] ; then
$BUILD_COMMAND services/slurmcluster -t rosetta/slurmcluster $BUILD_COMMAND services/slurmcluster -t rosetta/slurmcluster
$BUILD_COMMAND services/slurmclustermaster -t rosetta/slurmclustermaster $BUILD_COMMAND services/slurmclustermaster -t rosetta/slurmclustermaster
$BUILD_COMMAND services/slurmclusterworker -t rosetta/slurmclusterworker $BUILD_COMMAND services/slurmclusterworker -t rosetta/slurmclusterworker
$BUILD_COMMAND services/standaloneworker -t rosetta/standaloneworker
$BUILD_COMMAND services/dregistry -t rosetta/dregistry $BUILD_COMMAND services/dregistry -t rosetta/dregistry
$BUILD_COMMAND services/webapp -t rosetta/webapp $BUILD_COMMAND services/webapp -t rosetta/webapp
$BUILD_COMMAND services/postgres -t rosetta/postgres $BUILD_COMMAND services/postgres -t rosetta/postgres
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Put this file on all nodes of your cluster. # Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information. # See the slurm.conf man page for more information.
# #
ControlMachine=slurmclustermaster-main ControlMachine=slurmclustermaster
#ControlAddr= #ControlAddr=
#BackupController= #BackupController=
#BackupAddr= #BackupAddr=
...@@ -155,16 +155,15 @@ SlurmdLogFile=/var/log/slurm-llnl/slurmd.log ...@@ -155,16 +155,15 @@ SlurmdLogFile=/var/log/slurm-llnl/slurmd.log
#SuspendRate= #SuspendRate=
#SuspendTime= #SuspendTime=
# #
# Must add controller node explictly but don't place it into any partition # Must add controller node explicitly but don't place it into any partition
NodeName=slurmclustermaster-main CPUs=1 State=UNKNOWN NodeName=slurmclustermaster CPUs=1 State=UNKNOWN
#NodeName=partitiona-instrument CPUs=1 State=UNKNOWN #
#NodeName=partitionb-instrument CPUs=1 State=UNKNOWN
#NodeName=cris-instrument CPUs=1 State=UNKNOWN
# COMPUTE NODES # COMPUTE NODES
NodeName=slurmclusterworker-one CPUs=1 State=UNKNOWN NodeName=slurmclusterworker CPUs=1 State=UNKNOWN
#NodeName=slurmclusterworker-two CPUs=1 State=UNKNOWN #NodeName=slurmclusterworker-multi-one CPUs=1 State=UNKNOWN
PartitionName=partition1 Nodes=slurmclusterworker-one MaxTime=INFINITE State=UP #NodeName=slurmclusterworker-multi-two CPUs=1 State=UNKNOWN
#PartitionName=partition2 Nodes=slurmclusterworker-two MaxTime=INFINITE State=UP PartitionName=partition1 Nodes=slurmclusterworker MaxTime=INFINITE State=UP
#PartitionName=partition2 Nodes=slurmclusterworker-multi-one,slurmclusterworker-multi-two MaxTime=INFINITE State=UP
......
FROM quay.io/podman/stable:v3.2.3
# This is necessary due to some base image permission errors.
RUN chown -R podman:podman /home/podman
# Change user
RUN usermod -l testuser podman
RUN usermod -d /home/testuser testuser
RUN ln -s /home/podman /home/testuser
RUN groupmod -n testuser podman
# Replace uid/gid mapping from podman to testuser user
COPY subuid /etc/subuid
COPY subgid /etc/subgid
#RUN dnf repolist
#RUN dnf update --refresh
RUN dnf install -y docker singularity openssh-server
RUN ssh-keygen -A
RUN mkdir /home/testuser/.ssh
COPY keys/id_rsa.pub /home/testuser/.ssh/authorized_keys
RUN dnf install -y python wget
#----------------------
# Entrypoint
#----------------------
# Copy entrypoint
COPY entrypoint.sh /
# Give right permissions
RUN chmod 755 /entrypoint.sh
# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]
\ No newline at end of file
#!/bin/bash
# Exit on any error. More complex thing could be done in future
# (see https://stackoverflow.com/questions/4381618/exit-a-script-on-error)
set -e
# Fix FUSE permissions
chmod 777 /dev/fuse
#---------------------
# Entrypoint command
#---------------------
if [[ "x$@" == "x" ]] ; then
echo -n "[INFO] Executing Docker entrypoint command: /usr/sbin/sshd -D"
/usr/sbin/sshd -D
else
ENTRYPOINT_COMMAND=$@
echo -n "[INFO] Executing Docker entrypoint command: "
echo $ENTRYPOINT_COMMAND
exec "$ENTRYPOINT_COMMAND"
fi
#exec sudo -i -u testuser /bin/bash -c "$ENTRYPOINT_COMMAND"
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC2n4wiLiRmE1sla5+w0IW3wwPW/mqhhkm7IyCBS+rGTgnts7xsWcxobvamNdD6KSLNnjFZbBb7Yaf/BvWrwQgdqIFVU3gRWHYzoU6js+lKtBjd0e2DAVGivWCKEkSGLx7zhx7uH/Jt8kyZ4NaZq0p5+SFHBzePdR/1rURd8G8+G3OaCPKqP+JQT4RMUQHC5SNRJLcK1piYdmhDiYEyuQG4FlStKCWLCXeUY2EVirNMeQIfOgbUHJsVjH07zm1y8y7lTWDMWVZOnkG6Ap5kB+n4l1eWbslOKgDv29JTFOMU+bvGvYZh70lmLK7Hg4CMpXVgvw5VF9v97YiiigLwvC7wasBHaASwH7wUqakXYhdGFxJ23xVMSLnvJn4S++4L8t8bifRIVqhT6tZCPOU4fdOvJKCRjKrf7gcW/E33ovZFgoOCJ2vBLIh9N9ME0v7tG15JpRtgIBsCXwLcl3tVyCZJ/eyYMbc3QJGsbcPGb2CYRjDbevPCQlNavcMdlyrNIke7VimM5aW8OBJKVh5wCNRpd9XylrKo1cZHYxu/c5Lr6VUZjLpxDlSz+IuTn4VE7vmgHNPnXdlxRKjLHG/FZrZTSCWFEBcRoSa/hysLSFwwDjKd9nelOZRNBvJ+NY48vA8ixVnk4WAMlR/5qhjTRam66BVysHeRcbjJ2IGjwTJC5Q== rosetta@rosetta.platform
testuser:10000:5000
\ No newline at end of file
testuser:10000:5000
\ No newline at end of file
...@@ -288,7 +288,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -288,7 +288,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
binds += ' -v{}:{}'.format(expanded_base_path, expanded_bind_path) binds += ' -v{}:{}'.format(expanded_base_path, expanded_bind_path)
# TODO: remove this hardcoding # TODO: remove this hardcoding
prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else '' prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
run_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host) run_command = 'ssh -o LogLevel=ERROR -i {} -4 -o StrictHostKeyChecking=no {}@{} '.format(computing_keys.private_key_file, computing_user, computing_host)
run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir /tmp/{}_data && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid) run_command += '/bin/bash -c \'"rm -rf /tmp/{}_data && mkdir /tmp/{}_data && chmod 700 /tmp/{}_data && '.format(task.uuid, task.uuid, task.uuid)
...@@ -334,7 +334,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -334,7 +334,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
internal_stop_command = 'kill -9 {}'.format(task.id) internal_stop_command = 'kill -9 {}'.format(task.id)
elif container_runtime=='docker': elif container_runtime=='docker':
# TODO: remove this hardcoding # TODO: remove this hardcoding
prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else '' prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
internal_stop_command = '{} docker stop {} && {} docker rm {}'.format(prefix,task.id,prefix,task.id) internal_stop_command = '{} docker stop {} && {} docker rm {}'.format(prefix,task.id,prefix,task.id)
else: else:
raise NotImplementedError('Container runtime {} not supported'.format(container_runtime)) raise NotImplementedError('Container runtime {} not supported'.format(container_runtime))
...@@ -368,7 +368,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana ...@@ -368,7 +368,7 @@ class SSHStandaloneComputingManager(StandaloneComputingManager, SSHComputingMana
internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid) internal_view_log_command = 'cat /tmp/{}_data/task.log'.format(task.uuid)
elif container_runtime=='docker': elif container_runtime=='docker':
# TODO: remove this hardcoding # TODO: remove this hardcoding
prefix = 'sudo' if computing_host == 'slurmclusterworker-one' else '' prefix = 'sudo' if computing_host == 'slurmclusterworker' else ''
internal_view_log_command = '{} docker logs {}'.format(prefix,task.id) internal_view_log_command = '{} docker logs {}'.format(prefix,task.id)
else: else:
raise NotImplementedError('Container runtime {} not supported'.format(container_runtime)) raise NotImplementedError('Container runtime {} not supported'.format(container_runtime))
......
...@@ -275,11 +275,11 @@ to provide help, news and informations on your deployment. Or you can just ignor ...@@ -275,11 +275,11 @@ to provide help, news and informations on your deployment. Or you can just ignor
access_mode = 'ssh+cli', access_mode = 'ssh+cli',
auth_mode = 'user_keys', auth_mode = 'user_keys',
wms = None, wms = None,
conf = {'host': 'slurmclusterworker-one'}, conf = {'host': 'standaloneworker'},
container_runtimes = ['singularity']) container_runtimes = ['singularity','podman'])
# Add testuser extra conf for this computing resource # Add testuser extra conf for this computing resource
testuser.profile.add_extra_conf(conf_type = 'computing_user', object=demo_singlenode_computing, value= 'slurmtestuser') testuser.profile.add_extra_conf(conf_type = 'computing_user', object=demo_singlenode_computing, value= 'testuser')
# Demo cluster computing plus conf # Demo cluster computing plus conf
demo_slurm_computing = Computing.objects.create(name = 'Demo Cluster', demo_slurm_computing = Computing.objects.create(name = 'Demo Cluster',
...@@ -288,7 +288,7 @@ to provide help, news and informations on your deployment. Or you can just ignor ...@@ -288,7 +288,7 @@ to provide help, news and informations on your deployment. Or you can just ignor
access_mode = 'ssh+cli', access_mode = 'ssh+cli',
auth_mode = 'user_keys', auth_mode = 'user_keys',
wms = 'slurm', wms = 'slurm',
conf = {'host': 'slurmclustermaster-main', 'default_partition': 'partition1'}, conf = {'host': 'slurmclustermaster', 'default_partition': 'partition1'},
container_runtimes = ['singularity']) container_runtimes = ['singularity'])
# Add testuser extra conf for this computing resource # Add testuser extra conf for this computing resource
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment