When you are working on HPC and dealing with a great amount of jobs and folders, clear and straightforward management is essencially critical.
Folders & Files format
To improve teamwork, researchers should get agreement in their name formats for jobs and paths.
Here is an example of folders:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
Project_Folder
|-- Temperature_1/
|   |-- vc_pressure_1/
|   |   |-- vc_pressure_1.in
|   |   |-- vc_pressure_1.out
|   |-- vc_pressure_2/
|   |   |-- vc_pressure_2.in
|   |   |-- vc_pressure_2.out
|   |-- scf_pressure_1/
|   |   |-- scf_pressure_1.in
|   |   |-- scf_pressure_1.out
|   |   |-- ph_pressure_1.in
|   |   |-- ph_pressure_1.out
|   |-- scf_pressure_2/
|   |   |-- scf_pressure_2.in
|   |   |-- scf_pressure_2.out
|   |   |-- ph_pressure_2.in
|   |   |-- ph_pressure_2.out
Cancel your jobs
Create the script file
Copy the following script.
A quick creation can be like:
1
2
touch ~/cancel_job.py
vim ~/cancel_job.py
and paste the content into ~/cancel_job.py.
Use the script to cancel jobs
Run python ~/cancel_job.py to cancel all the current jobs in your user id.
To cancel one job, you can specify your job name by running python ~/cancel_job.py job_name
If you have a list of jobs have the same prefix and you want to cancel all of these, you can run python ~/cancel_job.py job_prefix
Script:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import subprocess
import re
import os
import sys
#--------------------------------------------------#
#             where you can customize              #
#--------------------------------------------------#
fn = "/rigel/home/#USRNAME/tmp/sq.tmp" # a tmp file to keep job queue info
order = "squeue -u #USRNAME" # the order to show all the jobs by this user
cancel = "scancel" # the order to cancel jobs on the working HPC
#--------------------------------------------------#
#                   color format                   #
#--------------------------------------------------#
class tmcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
class color:
    def __init__(self, content):
        self.content = content
    @property
    def header(self):
        return "%s%s%s" % (tmcolors.HEADER, self.content, tmcolors.ENDC)
    @property
    def warning(self):
        return "%s%s%s" % (tmcolors.WARNING, self.content, tmcolors.ENDC)
    @property
    def bold(self):
        return "%s%s%s" % (tmcolors.BOLD, self.content, tmcolors.ENDC)
    @property
    def green(self):
        return "%s%s%s" % (tmcolors.OKGREEN, self.content, tmcolors.ENDC)
    @property
    def blue(self):
        return "%s%s%s" % (tmcolors.OKBLUE, self.content, tmcolors.ENDC)
    @property
    def underline(self):
        return "%s%s%s" % (tmcolors.UNDERLINE, self.content, tmcolors.ENDC)
#--------------------------------------------------#
#                  main functions                  #
#--------------------------------------------------#
def get_mysq():
    '''
    return lines of squeue
    '''
    if os.path.exists(fn):
        subprocess.call("rm -f %s" % fn, shell=True)
    subprocess.call("%s > %s" % (order,fn), shell=True)
    with open(fn, "r") as f:
        lines = f.readlines()
    subprocess.call("rm -f %s" % fn, shell=True)
    return lines
def rua(argus , job_name):
    '''
    return if this item is needed to be scanceled
    '''
    status = False
    if len(argus) >1:
        for argu in argus:
            if re.findall(argu,job_name):
                status = True
                break
    else:
        status = True
    
    return status
    
if __name__ == "__main__":
        lines = get_mysq()
        argu = sys.argv
        if len(lines) >1:
            for line in lines[1:]:
                job_id = line.split()[0]
                job_name = line.split()[2]
                if rua(argu,job_name) :
                    subprocess.call("%s %s"%(cancel, job_id), shell = True)
                    job_name = color(job_name).header
                    print("%s is cancelled!"%job_name)
        else:
            print( color("No job now!").warning)