基于shell的简单好用多进程wrapper...
其中的run_single_task.sh
是真实的任务负载1
2
3sh run_single_task.sh ${tmp_files}/${file_basename} ${OUTPUT_PATH}/"${file_basename}.out" \
1>${OUTPUT_PATH}/${file_basename}.stdout \
2>${OUTPUT_PATH}/${file_basename}.stderr
可以通过切分数据的方式,通过shell就可以直接进行多进程处理,可以参考以下过程。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29INPUT_FILE=${1}
OUTPUT_PATH=${2}
NUM_PROC=${3}
tmp_files=".tmp"
rm -f ${tmp_files}/part-*
mkdir -p ${tmp_files}
mkdir -p ${OUTPUT_PATH}
total_lines=`wc -l ${INPUT_FILE} | cut -d " " -f1`
piece=$((($total_lines+$NUM_PROC-1)/$NUM_PROC))
split -l ${piece} -d -a 5 ${INPUT_FILE} ${tmp_files}/part-
ls -l ${tmp_files}/part-* | awk -F " " '{print $NF}' > ${tmp_files}/all_file_part.list
read -ra READ_LIST -d '' < ${tmp_files}/all_file_part.list
rm ${tmp_files}/all_file_part.list
for filename in "${READ_LIST[@]}";
do
file_basename=`basename ${filename}`
echo ${file_basename}
echo ${filename}
nohup sh run_single_task.sh ${tmp_files}/${file_basename} ${OUTPUT_PATH}/"${file_basename}.out" \
1>${OUTPUT_PATH}/${file_basename}.stdout \
2>${OUTPUT_PATH}/${file_basename}.stderr &
done
wait
cat ${OUTPUT_PATH}/"*.out" > ${OUTPUT_PATH}/total.out