计算过程基本数据流方向: DISK -> RAM -> Cache -> CPU |
|
|
|
Linux系统结构: |
|
|
|
|
|
IO重定向:<, >, <<, >>
|
|
|
|
|
|
|
exit status -> $?
and set -o pipefail
# allergic to seven counter
function algc7(){
# I eat numbers in pipe, I died if I see seven
while read num;do
[[ "$num" == *"7"* ]] && exit 1
echo $num
done
}
seq 10 | wc -l; echo $? # expect 10 and normal exit, get 10 and exit0
seq 10 | algc7 | wc -l; echo $? # expect 6 and error exit, get 6 and exit0, quack!
set -o pipefail
seq 10 | algc7 | wc -l; echo $? # expect 6 and error exit, get 6 and exit1, good doctor!
set -o nounset and set -o errexit
或者显式地进行错误退出检查:
# blah blah blah
# some woring code before
if [ $? ne 0 ];then
echo "[ERROR]: Some error msg" >&2
exit 1
fi
检查输出结果合法性
# 文件是否有输出
if [ -s file ]; then ... fi
# 变量是否为空
if [ -z var ]; then ... fi
if [ ! -n var ]; then ... fi
绝大多数生信数据格式都是可压缩的, 尽量存储为压缩格式:
sam –> bam/cram & index
fasta/fastq –> bgzip & tabix
bed/gff/gtf/vcf –> bgzip & tabix
txt –> gzip
中间文件在流程中就要处理掉
定时备份数据