Linux tips
From silico.biotoul.fr
m (→sed, grep, cut, find, wc, sort) |
m (→Processes) |
||
Line 337: | Line 337: | ||
= Processes = | = Processes = | ||
- | ps, | + | <source lang='bash'> |
+ | # list processes. The 1st column is the PID (process id) which can be used to send signals | ||
+ | $ ps faux | less | ||
+ | |||
+ | # top processes, hit M or P to sort by memory or CPU, q to exit | ||
+ | # physical memory used is the RES column (RESident) | ||
+ | $ top | ||
+ | |||
+ | # launch a program in background with & | ||
+ | $ longtask & | ||
+ | # you can run multiple commands in parallel: | ||
+ | $ cmd1 & cmd2 & cmd3 | ||
+ | $ date & ls | ||
+ | [1] 8271 | ||
+ | Tue Mar 5 17:15:57 CET 2013 | ||
+ | heart.txt.orange.tab knn.13.loocv knn.17.loocv knn.20.loocv knn.5.loocv knn.9.loocv NaiveBayes.py | ||
+ | knn.10.loocv knn.14.loocv knn.18.loocv knn.2.loocv knn.6.loocv knn.py sample-heart.tab | ||
+ | [1]+ Done date | ||
+ | |||
+ | # if you forget the & | ||
+ | # it is possible to stop the process (in the foreground) with control+Z | ||
+ | $ sleep 99999 | ||
+ | ^Z | ||
+ | [1]+ Stopped sleep 99999 | ||
+ | $ sleep 1239999 | ||
+ | ^Z | ||
+ | [2]+ Stopped sleep 1239999 | ||
+ | # list of running jobs | ||
+ | $ jobs | ||
+ | [1]- Stopped sleep 9999 | ||
+ | [2]+ Stopped sleep 1239999 | ||
+ | # put job 1 in the foreground | ||
+ | $ fg 1 | ||
+ | $ fg 1 | ||
+ | sleep 9999 | ||
+ | ^Z | ||
+ | [1]+ Stopped sleep 9999 | ||
+ | # put it in the background | ||
+ | $ bg 1 | ||
+ | [1]+ sleep 9999 & | ||
+ | $ jobs | ||
+ | [1]- Running sleep 9999 & | ||
+ | [2]+ Stopped sleep 1239999 | ||
+ | $ fg 2 | ||
+ | ^C | ||
+ | $ ps | ||
+ | PID TTY TIME CMD | ||
+ | 8342 pts/0 00:00:00 sleep | ||
+ | 8465 pts/0 00:00:00 ps | ||
+ | 10318 pts/0 00:00:00 bash | ||
+ | # kill a process by its PID | ||
+ | $ kill 8342 | ||
+ | [1]+ Terminated sleep 9999 | ||
+ | # by default, the kill command asks the process to stop | ||
+ | # but sometimes the process is not listening (e.g. it is stopped) | ||
+ | $ sleep 888888 | ||
+ | ^Z | ||
+ | ^Z | ||
+ | [1]+ Stopped sleep 888888 | ||
+ | [barriot@gamborimbo TP-Classification]$ ps | ||
+ | PID TTY TIME CMD | ||
+ | 8513 pts/0 00:00:00 sleep | ||
+ | 8520 pts/0 00:00:00 ps | ||
+ | 10318 pts/0 00:00:00 bash | ||
+ | $ kill 8513 | ||
+ | $ ps | ||
+ | PID TTY TIME CMD | ||
+ | 8513 pts/0 00:00:00 sleep | ||
+ | 8535 pts/0 00:00:00 ps | ||
+ | 10318 pts/0 00:00:00 bash | ||
+ | $ jobs | ||
+ | [1]+ Stopped sleep 888888 | ||
+ | # nothing happened because the process is stopped and thus cannot listened and respond to our demand (until it is running again) | ||
+ | # to kill such a process, we have to send a SIGKILL (9) instead of the default SIGTERM (15) | ||
+ | $ kill -9 8513 | ||
+ | [1]+ Killed sleep 888888 | ||
+ | # this way we ask the system to kill the process instead asking to the process | ||
+ | |||
+ | # list of signals | ||
+ | $ kill -l | ||
+ | 1) SIGHUP 2) SIGINT 3) SIGQUIT 4) SIGILL 5) SIGTRAP | ||
+ | 6) SIGABRT 7) SIGBUS 8) SIGFPE 9) SIGKILL 10) SIGUSR1 | ||
+ | 11) SIGSEGV 12) SIGUSR2 13) SIGPIPE 14) SIGALRM 15) SIGTERM | ||
+ | 16) SIGSTKFLT 17) SIGCHLD 18) SIGCONT 19) SIGSTOP 20) SIGTSTP | ||
+ | 21) SIGTTIN 22) SIGTTOU 23) SIGURG 24) SIGXCPU 25) SIGXFSZ | ||
+ | 26) SIGVTALRM 27) SIGPROF 28) SIGWINCH 29) SIGIO 30) SIGPWR | ||
+ | 31) SIGSYS 34) SIGRTMIN 35) SIGRTMIN+1 36) SIGRTMIN+2 37) SIGRTMIN+3 | ||
+ | 38) SIGRTMIN+4 39) SIGRTMIN+5 40) SIGRTMIN+6 41) SIGRTMIN+7 42) SIGRTMIN+8 | ||
+ | 43) SIGRTMIN+9 44) SIGRTMIN+10 45) SIGRTMIN+11 46) SIGRTMIN+12 47) SIGRTMIN+13 | ||
+ | 48) SIGRTMIN+14 49) SIGRTMIN+15 50) SIGRTMAX-14 51) SIGRTMAX-13 52) SIGRTMAX-12 | ||
+ | 53) SIGRTMAX-11 54) SIGRTMAX-10 55) SIGRTMAX-9 56) SIGRTMAX-8 57) SIGRTMAX-7 | ||
+ | 58) SIGRTMAX-6 59) SIGRTMAX-5 60) SIGRTMAX-4 61) SIGRTMAX-3 62) SIGRTMAX-2 | ||
+ | 63) SIGRTMAX-1 64) SIGRTMAX | ||
+ | |||
+ | # notice SIGSTOP and SIGCONT which are the same as Ctrl-Z fg or bg | ||
+ | |||
+ | # it is possible to kill all process having a given name | ||
+ | $ killall anoying_process | ||
+ | |||
+ | # when you run a command in the shell, the shell is its parent process | ||
+ | # if you log to a remote host to run a very long analysis and get disconnected | ||
+ | # the remote shell dies and all of its children also die | ||
+ | # to prevent this behavior, it is possible to run the command | ||
+ | $ nohup ./long_analysis & | ||
+ | |||
+ | # nohup stands for no hang up. If you forgot the nohup, it is possible to achieve the same as follows: | ||
+ | $ sleep 999999999 | ||
+ | ^Z | ||
+ | [1]+ Stopped sleep 999999999 | ||
+ | $ bg | ||
+ | [1]+ sleep 999999999 & | ||
+ | $ ps | ||
+ | PID TTY TIME CMD | ||
+ | 8878 pts/0 00:00:00 sleep | ||
+ | 8883 pts/0 00:00:00 ps | ||
+ | 10318 pts/0 00:00:00 bash | ||
+ | $ disown -h 8878 | ||
+ | </source> | ||
= shell = | = shell = |
Revision as of 16:34, 5 March 2013
Contents |
Paths & I/O & files
Linux filesystem organization
/ | The root directory. |
/boot | Boot directory (kernel and boot loader) |
/etc | Configuration files for the system. e.g. /etc/fstab specifies which drives to mount where. /etc/hosts lists network hosts and IP addresses. |
/bin /usr/bin | The /bin directory has the essential programs that the system requires to operate, while /usr/bin contains applications for the system's users. |
/sbin /usr/sbin | The sbin directories contain programs for system administration, mostly for use by the superuser (root). |
/usr | contains things that support user applications. |
/usr/local | /usr/local and its subdirectories (bin, lib, share, ...) are used for the installation of software and other files for use on the local machine i.e., not part of the official distribution. |
/var | contains files that change as the system is running. This includes: log (logs!), spool (files that are queued for some process, such as mail messages and print jobs) |
/lib /lib64 | shared libraries (similar to DLLs of windows) |
/home | users personal directories |
/root | System administrator's home directory |
/tmp | holds temporary files (anybody/program can write) |
/dev | In linux, devices are represented by files under that directory (e.g. disks are block devices such as /dev/sda or /dev/hda usually for the 1st hard drive) |
/proc | virtual directory giving access to the running kernel and system. e.g. /proc/cpuinfo /proc/meminfo /proc/uptime |
/media /run/media /mnt | removable devices (usb sticks, usb drives, ...) are usually mounted in one of those when plugged |
Paths & directories: pwd, mkdir, rmdir, rm
- pwd returns current directory
- relative to current directory: e.g. ls subdir/subsubdir or ls ../whatever/
- absolute ls ~user/path or ls /home/user/path
- mkdir: create directory. e.g. mkdir ~/newdir or with subdirs mkdir -p ~/new/newsub/newsubsub
- rmdir dirname or if not empty rm -fr dirname
Permissions: chown, chgrp, chmod
$ ls -l /home drwxr-x--- 69 barriot gsi 4.0K Mar 5 12:09 barriot drwx------ 2 root root 16K Jul 12 2010 lost+found drwxr-xr-x 36 micas stage 4.0K Jul 31 2012 micas ... [barriot@gamborimbo ~]$ ls -lh Documents/TEACHING/2012-2013/M1-MABS/Graph/TP3-igraph.layout/ total 80K drwxr-xr-x 1 barriot gsi 4.0K Mar 14 2012 HDE.old -rw-r--r-- 1 barriot gsi 24K Mar 14 2012 91347.nwk -rw-r--r-- 1 barriot gsi 942 Mar 1 16:02 Cleandb_Luca_1_S_1_1_65_Iso_Tr_1-CC1.cod -rw-r--r-- 1 barriot gsi 28K Sep 7 2010 Cleandb_Luca_1_S_1_1_65_Iso_Tr_1-CC1.gr -rw-r--r-- 1 barriot gsi 2.3K Sep 7 2010 Cleandb_Luca_1_S_1_1_65_Iso_Tr_1-CC1.tgr -rw-r--r-- 1 barriot gsi 4.7K Mar 5 11:42 cmds.R -rw-r--r-- 1 barriot gsi 871 Mar 14 2012 sample_tree_with_branchlengths.nwk -rwxr-xr-x 1 barriot gsi 670 Mar 14 2012 drawTree.py -rw-r--r-- 1 barriot gsi 5.6K Feb 27 16:57 Tree.py
First character corresponds to file type. d for directory, - for a regular file, ... Then by 3 for the owner (user), the group and the others.
For a regular file :
- r for permission to read
- w for permission to modify
- x for being able to execute the file (binary executable or script)
For a directory :
- r to be able to read the content (list files in the directory)
- w to be able to add or remove files
- x to be able to pass through that directory, i.e. cd to that dir or a subdir
Modify ownership of a file or directory :
# change owner chown newuser file # recursive chown -R newuser directory # change group chgrp newgroup filename # change both chown newuser.newgroup filename
Modify permissions:
# numeric notation: r=4, w=2, x=1, thus for rwx-r-x--- chmod 760 file # recursively on a sub directory chmod -R 760 dirname # symbolic notation: chmod u=rwx,g=rx,o= filename # add execute permission for all: chmod a+x filename # revoke write permission for others: chmod o-w filename
File info & type: stat, file
[barriot@gamborimbo ~]$ stat /home/barriot File: `/home/barriot' Size: 12288 Blocks: 24 IO Block: 4096 directory Device: fd02h/64770d Inode: 1048577 Links: 119 Access: (0755/drwxr-xr-x) Uid: ( 500/ barriot) Gid: ( 501/ gsi) Access: 2013-03-05 10:39:08.927051453 +0100 Modify: 2013-03-05 10:39:00.240074369 +0100 Change: 2013-03-05 10:39:00.240074369 +0100 Birth: -
[barriot@gamborimbo ~]$ stat .bashrc File: `.bashrc' Size: 517 Blocks: 8 IO Block: 4096 regular file Device: fd02h/64770d Inode: 1052239 Links: 1 Access: (0755/-rwxr-xr-x) Uid: ( 500/ barriot) Gid: ( 501/ gsi) Access: 2013-03-02 16:04:19.268619379 +0100 Modify: 2012-10-12 17:24:24.818899216 +0200 Change: 2012-11-18 23:25:18.869870338 +0100 Birth: -
[barriot@gamborimbo ~]$ file /home/barriot /home/barriot: directory
[barriot@gamborimbo ~]$ file .bashrc .bashrc: ASCII text
File content, concatenation, split, ... and redirections: cat, split, head, tail, more, less, tac
# display content cat somefile.txt # concatenate 2 or more files cat file_1.txt file_2.txt cat *.txt # redirect to a file (if file exists it will be overwritten otherwise it gets created) cat file_1.txt file_2.txt > result.txt # redirect to a file (if file exists it will be appended at the end otherwise it gets created) cat others*.txt >> result.txt # split a file into smaller parts ## by file size (1kb) split --bytes 1024 big.file split -b 1024 big.file ## by number of lines per output files split --lines 100 big.text.file.txt split -l 100 big.text.file.txt ## by number of output files split --number 10 big.file split -n 10 big.file ## specify output files prefix and numbered numerically (3 digits) split -n 100 -a 3 -d big.file part_ split -n 100 --suffix-length 3 --numeric-suffixes big.file part_ # displays line of a file in reverse order tac file.txt # first 10 lines of files head -n 10 *.txt # last 10 lines tail -n 10 *.txt # last lines of a file and keeps outputting new lines added to the file tail -f /var/log/httpd/error.log # content of a file page by page (space for next page, enter for next line) more file.txt # content of a file: page up/down to browse. /expr to search (then n for next match and p for previous match). q to exit less file.txt
grep, cut, sort, wc, find
- grep
To find files containing some string or regular expression:
grep myWeirdFunctionName *.cpp
Or recursively:
grep -r myWeirdFunctionName *
To display at what line it is found:
grep -n myWeirdFunctionName myweirdlibrary.cpp
- cut
To display only some columns: Media:Data_Mining_heart.txt
$ head Data_Mining_heart.txt age sex chest_pain_type resting_blood_pressure serum_cholesterol fasting_blood_sugar resting_ecg_results max_heart_rate exercise_induced_anginadepression_induced slope major_vessels thal disease continuous discrete discrete continuous continuous discrete discrete continuous discrete continuous continuous continuous discrete discrete class 70 M 4 130 322 FALSE 2 109 FALSE 2.4 2 3 normal TRUE 67 F 3 115 564 FALSE 2 160 FALSE 1.6 2 0 reversable_defect FALSE 57 M 2 124 261 FALSE 0 141 FALSE 0.3 1 0 reversable_defect TRUE 64 M 4 128 263 FALSE 0 105 TRUE 0.2 2 1 reversable_defect FALSE 74 F 2 120 269 FALSE 2 121 TRUE 0.2 1 1 normal FALSE 65 M 4 120 177 FALSE 0 140 FALSE 0.4 1 0 reversable_defect FALSE 56 M 3 130 256 TRUE 2 142 TRUE 0.6 2 1 fixed_defect TRUE $ head heart.txt.orange.tab | cut -f 1,4 age resting_blood_pressure continuous continuous 70 130 67 115 57 124 64 128 74 120 65 120 56 130 # sometimes we need to specify the character delimiting the columns $ tail clinical_info.csv "X86A40";12.17;;"F";"Uppsala";"F";61;"F";24;"G1" "X87A79";12.08;;"T";"Uppsala";"T";36;"T";12;"G2" "X88A67";4.25;"F";"T";"Uppsala";"T";63;"T";24;"G3" "X89A64";12.08;;"T";"Uppsala";"T";60;"T";23;"G1" "X8B87";11.33;;"T";"Uppsala";"T";58;"T";17;"G2" "X90A63";2.67;;"T";"Uppsala";"T";76;"T";26;"G3" "X94A16";11.08;;"T";"Uppsala";"T";73;"T";6;"G2" "X96A21";0.08;"F";"T";"Uppsala";"T";63;"T";38;"G3" "X99A50";10.5;;"T";"Uppsala";"T";82;"F";19;"G2" "X9B52";11.33;;"T";"Uppsala";"T";71;"T";12;"G3" $ tail clinical_info.csv | cut -f 10 -d';' "G1" "G2" "G3" "G1" "G2" "G3" "G2" "G3" "G2" "G3"
- sort, wc
# sort thal values $ head Data_Mining_heart.txt | cut -f 13 | sort discrete fixed_defect normal normal reversable_defect reversable_defect reversable_defect reversable_defect thal # remove duplicates $ cat Data_Mining_heart.txt | cut -f 13 | sort -u discrete fixed_defect normal reversable_defect thal # number of characters, words, lines $ wc *.loocv 281 1941 14174 knn.10.loocv 281 1941 14154 knn.11.loocv 281 1941 14171 knn.12.loocv 281 1941 14174 knn.13.loocv 281 1941 14150 knn.14.loocv 281 1941 14167 knn.15.loocv 281 1941 14150 knn.16.loocv 281 1941 14166 knn.17.loocv 281 1941 14172 knn.18.loocv 281 1941 14161 knn.19.loocv 281 1941 14194 knn.1.loocv 281 1941 14179 knn.20.loocv 281 1941 14153 knn.2.loocv 281 1941 14162 knn.3.loocv 281 1941 14166 knn.4.loocv 281 1941 14149 knn.5.loocv 281 1941 14164 knn.6.loocv 281 1941 14159 knn.7.loocv 281 1941 14184 knn.8.loocv 281 1941 14148 knn.9.loocv 281 1941 14210 NaiveBayes.loocv 5901 40761 297507 total # unique values of thal $ cat Data_Mining_heart.txt | cut -f 13 | sort -u | wc -l 6
- find
Find allows to filter files and dirs based on various attributes:
- name/pattern
- date/age
- size
- type
- permissions
- and others...
# find files by name recursively starting from the current subdirectory find ./ -name what*I*am*looking* # by time accessed (amin in minutes or atime in days), changed (cmin in minutes or ctime in days), modified (mtime in days) ## accessed less than 10 minutes ago find ./ -amin -10 ## changed more than 1 hour find ./ -ctime +60
- sed
To replace something (e.g. jamaica.biotoul.fr) by somethingelse (e.g. jamaica.ibcg.biotoul.fr) in a file:
sed -i 's/jamaica.biotoul.fr/jamaica.ibcg.biotoul.fr/g' gsiwikidb.after_sed.sql
Remove sequence limits from Jalview output:
sed -i 's/\/[0-9]*-[0-9]*//' CleanupFile_slimites.fa
To replace from a file to a file:
sed 's/jamaica.biotoul.fr/jamaica.ibcg.biotoul.fr/g' < gsiwikidb.before_sed.sql > gsiwikidb.after_sed.sql
To apply that to a set of files using find:
find /var/www -type f -exec sed -i 's/jamaica.biotoul.fr/jamaica.ibcg.biotoul.fr/g' {} \;
(I'm not sure about the ending \; .. it was in my bash script).
To apply that to a set of files returned by grep:
files=$(grep -R silico * | grep -v .svn | cut -f 1 -d':') for i in $files; do sed -i 's/silico.biotoul.fr/jamaica.ibcg.biotoul.fr/g' $i; done
Processes
# list processes. The 1st column is the PID (process id) which can be used to send signals $ ps faux | less # top processes, hit M or P to sort by memory or CPU, q to exit # physical memory used is the RES column (RESident) $ top # launch a program in background with & $ longtask & # you can run multiple commands in parallel: $ cmd1 & cmd2 & cmd3 $ date & ls [1] 8271 Tue Mar 5 17:15:57 CET 2013 heart.txt.orange.tab knn.13.loocv knn.17.loocv knn.20.loocv knn.5.loocv knn.9.loocv NaiveBayes.py knn.10.loocv knn.14.loocv knn.18.loocv knn.2.loocv knn.6.loocv knn.py sample-heart.tab [1]+ Done date # if you forget the & # it is possible to stop the process (in the foreground) with control+Z $ sleep 99999 ^Z [1]+ Stopped sleep 99999 $ sleep 1239999 ^Z [2]+ Stopped sleep 1239999 # list of running jobs $ jobs [1]- Stopped sleep 9999 [2]+ Stopped sleep 1239999 # put job 1 in the foreground $ fg 1 $ fg 1 sleep 9999 ^Z [1]+ Stopped sleep 9999 # put it in the background $ bg 1 [1]+ sleep 9999 & $ jobs [1]- Running sleep 9999 & [2]+ Stopped sleep 1239999 $ fg 2 ^C $ ps PID TTY TIME CMD 8342 pts/0 00:00:00 sleep 8465 pts/0 00:00:00 ps 10318 pts/0 00:00:00 bash # kill a process by its PID $ kill 8342 [1]+ Terminated sleep 9999 # by default, the kill command asks the process to stop # but sometimes the process is not listening (e.g. it is stopped) $ sleep 888888 ^Z ^Z [1]+ Stopped sleep 888888 [barriot@gamborimbo TP-Classification]$ ps PID TTY TIME CMD 8513 pts/0 00:00:00 sleep 8520 pts/0 00:00:00 ps 10318 pts/0 00:00:00 bash $ kill 8513 $ ps PID TTY TIME CMD 8513 pts/0 00:00:00 sleep 8535 pts/0 00:00:00 ps 10318 pts/0 00:00:00 bash $ jobs [1]+ Stopped sleep 888888 # nothing happened because the process is stopped and thus cannot listened and respond to our demand (until it is running again) # to kill such a process, we have to send a SIGKILL (9) instead of the default SIGTERM (15) $ kill -9 8513 [1]+ Killed sleep 888888 # this way we ask the system to kill the process instead asking to the process # list of signals $ kill -l 1) SIGHUP 2) SIGINT 3) SIGQUIT 4) SIGILL 5) SIGTRAP 6) SIGABRT 7) SIGBUS 8) SIGFPE 9) SIGKILL 10) SIGUSR1 11) SIGSEGV 12) SIGUSR2 13) SIGPIPE 14) SIGALRM 15) SIGTERM 16) SIGSTKFLT 17) SIGCHLD 18) SIGCONT 19) SIGSTOP 20) SIGTSTP 21) SIGTTIN 22) SIGTTOU 23) SIGURG 24) SIGXCPU 25) SIGXFSZ 26) SIGVTALRM 27) SIGPROF 28) SIGWINCH 29) SIGIO 30) SIGPWR 31) SIGSYS 34) SIGRTMIN 35) SIGRTMIN+1 36) SIGRTMIN+2 37) SIGRTMIN+3 38) SIGRTMIN+4 39) SIGRTMIN+5 40) SIGRTMIN+6 41) SIGRTMIN+7 42) SIGRTMIN+8 43) SIGRTMIN+9 44) SIGRTMIN+10 45) SIGRTMIN+11 46) SIGRTMIN+12 47) SIGRTMIN+13 48) SIGRTMIN+14 49) SIGRTMIN+15 50) SIGRTMAX-14 51) SIGRTMAX-13 52) SIGRTMAX-12 53) SIGRTMAX-11 54) SIGRTMAX-10 55) SIGRTMAX-9 56) SIGRTMAX-8 57) SIGRTMAX-7 58) SIGRTMAX-6 59) SIGRTMAX-5 60) SIGRTMAX-4 61) SIGRTMAX-3 62) SIGRTMAX-2 63) SIGRTMAX-1 64) SIGRTMAX # notice SIGSTOP and SIGCONT which are the same as Ctrl-Z fg or bg # it is possible to kill all process having a given name $ killall anoying_process # when you run a command in the shell, the shell is its parent process # if you log to a remote host to run a very long analysis and get disconnected # the remote shell dies and all of its children also die # to prevent this behavior, it is possible to run the command $ nohup ./long_analysis & # nohup stands for no hang up. If you forgot the nohup, it is possible to achieve the same as follows: $ sleep 999999999 ^Z [1]+ Stopped sleep 999999999 $ bg [1]+ sleep 999999999 & $ ps PID TTY TIME CMD 8878 pts/0 00:00:00 sleep 8883 pts/0 00:00:00 ps 10318 pts/0 00:00:00 bash $ disown -h 8878
shell
variables, test, $?, for, if, function, $(cmd), && ||
Archive
tar, bzip, gzip, rsync
Network
ssh, scp, nslookup, wget, nmap, ping
Misc
diff, diffuse, mount, df, du, alternatives, lsof