awk
教學網站
- https://linuxhandbook.com/awk-command-tutorial/
- Running Awk in parallel to process 256M records
- Awk one-liners
搜尋字串
awk '/^this/{print $0}' #與 sed -n '/^this/p' 相同
搜尋取代
# 移除分號
awk { gsub(/\;/, "") }
移除重複資料的行
我們經常使用 sort 與 uniq 指令,從檔案中找出並移除重複項目。不過如果你不希望你的原始檔被排序或更動,這時正是 awk 派上用場的時候,我們可以用 awk 截取不重複記錄並儲存在新的檔案中
awk '!x[$0]++' filewithdupes > newfile
搜尋含 disabled 的行,並列出第 1, 3 欄的內容
awk '/disabled/{print $1, $3}'
取值做計算
awk '{print "up " $1 /60 " minutes"}' /proc/uptime
df -lP -text4 |awk '{sum += $4} END {printf "%d GiB\n", sum/1048576}'
df -lP -text4 |awk '{sum += $4} END {printf "%d GiB\n", sum/2**20}'
加上判斷式
df -k |grep "/dev/" | awk '($2 > 0 && ((1 - $3/$2) > 0.9) ) {print $0 }'
awk -F" " '{print ($7 != "A")?$0"***":$0}' myfile
搜尋每行的第9欄,如果不是 0x00000000 時就顯示該行訊息
cat info.out | awk '($9 != "0x00000000") {print}'
列出 uid >= 500 且 <= 10000 的行
export UGIDLIMIT=500
awk -v LIMIT=$UGIDLIMIT -F: '($3>=LIMIT) && ($3<=10000)' /etc/passwd
解決長整數顯示問題
$ awk 'BEGIN {print 12345678901234567890}'
1.23457e+19
方法一
$ awk 'BEGIN {printf("%d\n", 12345678901234567890)}'
12345678901234567168
方法二
$ awk 'BEGIN {OFMT="%.0f"; print 12345678901234567890}'
12345678901234567168
列出 uid=0 的帳號
awk -F: '($3 == "0") {print}' /etc/passwd
列出最後一個欄位的值
ls -ltd */ | awk -F ' ' '{print $NF}'
列出長度大於 64 的行
awk 'length > 64'
格式化輸出
awk '{ printf("1-minute: %s\n5-minute: %s\n15-minute: \
%s\n",$1,$2,$3); }' /proc/loadavg
計算目錄的檔案大小
foldersize() {
if [ -d $1 ]; then
ls -alRF $1/ | grep '^-' | awk 'BEGIN {tot=0} { tot=tot+$5 } END { print tot }'
else
echo "$1: folder does not exist"
fi
}
計算單字總數(以符號 "空格" 作為單字的識別)
awk '{total=total+NF}; END {print total+0}'
搜尋特定字串的文字段落內容
# lspci -v | awk '/ATI/,/^$/'
01:03.0 VGA compatible controller: ATI Technologies Inc Rage XL (rev 27) (prog-if 00 [VGA])
Subsystem: Compaq Computer Corporation: Unknown device 001e
Flags: bus master, stepping, medium devsel, latency 64
Memory at fc000000 (32-bit, non-prefetchable) [size=16M]
I/O ports at 3000 [size=256]
Memory at fbff0000 (32-bit, non-prefetchable) [size=4K]
Capabilities: [5c] Power Management version 2
批次 Kill 名稱包含有 /plugins/mactrack 的程式
ps -ef | grep "/plugins/mactrack" | awk '{system("kill " $2);}'
使用兩個不同的區隔符號: 空格 + =
# 顯示回應時間
ping 8.8.8.8 | awk -F[\ =] '{print $10}'
取出每十行的資料(第 10, 20, 30, ...)
awk '!(NR % 10)' file
自動清理舊檔案,保留最近一個檔案
# Sort nmon files by time, delete a file far from the current time, always keep only one nmon file:
ls -t ~/*.nmon |awk '/\.nmon/ {if (NR >1){system ("rm " $1)}}'
CSV 指定欄位值
- GoAWK - A POSIX-compliant AWK interpreter written in Go, with CSV support
# 統計第 13 欄 APPNAME 每個值的計數
cat ./ISO27001/db2/fdctest_validate.csv | awk -F, '{a[$13]++} END {for (k in a) print k, a[k]}'
# 變更欄位 3 的值
awk '{ $3 = toupper(substr($3,1,1)) substr($3,2) } $3' FS=, OFS=, file
# 欄位 3 變更成大寫
awk '$3 { print toupper($0); }' file
清除空白行
awk NF test.txt
段落文字的解析
script: aud2csv.sh
Raw Data:
timestamp=2023-01-08-23.13.02.322992;
category=CHECKING;
audit event=CHECKING_OBJECT;
event correlator=107;
event status=0;
database=RPTDB;
userid=winmfg;
authid=WINMFG;
application id=10.8.25.30.64020.230108151301;
application name=EXCEL.EXE;
package schema=NULLID;
package name=SYSSH200;
package section=4;
object schema=ISTRPT;
object name=FHOPEHS;
object type=TABLE;
access approval reason=OBJECT;
access attempted=SELECT;
local transaction id=0x00000001b6550792;
global transaction id=0x0000000000000000000000000000000000000000;
instance name=istrpt;
hostname=BSMDB_B;
函式說明:
# 宣告: 紀錄的間隔符號為 空行, 欄位間隔符號為 =
# 一個段落文字為一筆紀錄,每一行以 = 為間隔區別不同欄位
BEGIN {
FS="=";
RS="";
}
# 過濾條件: 欄位總數是 nfp2 的值的資料
# 此實例包含了有不一致欄位數的紀錄,所以必須先做過濾
NF==nfp2 {
}
# 移除 分號字元
{ gsub(/\;/, "") }
# TIMESTAMP = 欄位 2
# CATEGORY = 欄位 4
# f1 = 欄位 1
# f2 = 欄位 3
{
TIMESTAMP=2; CATEGORY=4; AUDIT_EVENT=6; EVENT_CORRELATOR=8; EVENT_STATUS=10; DATABASE=12; USERID=14; AUTHID=16; APPLICATION_ID=18; APPLICATION_NAME=20; PACKAGE_SCHEMA=22; PACKAGE_NAME=24; PACKAGE_SECTION=26; OBJECT_SCHEMA=28; OBJECT_NAME=30; OBJECT_TYPE=32; ACCESS_APPROVAL_REASON=34; ACCESS_ATTEMPTED=36; LOCAL_TRANSACTION_ID=38; GLOBAL_TRANSACTION_ID=40; INSTANCE_NAME=42; HOSTNAME=44;
f1=1; f2=3; f3=5; f4=7; f5=9; f6=11; f7=13; f8=15; f9=17; f10=19; f11=21; f12=23; f13=25; f14=27; f15=29; f16=31; f17=33; f18=35; f19=37; f20=39; f21=41; f22=43
}
# 印出 CSV 的 Header 行
if (! headline)
{
headline = sprintf( "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s", $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22 );
print headline;
}
# 印出 CSV 的資料
dataline = sprintf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s", $TIMESTAMP, $CATEGORY, $AUDIT_EVENT, $EVENT_CORRELATOR, $EVENT_STATUS, $DATABASE, $USERID, $AUTHID, $APPLICATION_ID, $APPLICATION_NAME, $PACKAGE_SCHEMA, $PACKAGE_NAME, $PACKAGE_SECTION, $OBJECT_SCHEMA, $OBJECT_NAME, $OBJECT_TYPE, $ACCESS_APPROVAL_REASON, $ACCESS_ATTEMPTED, $LOCAL_TRANSACTION_ID, $GLOBAL_TRANSACTION_ID, $INSTANCE_NAME, $HOSTNAME );
print dataline;
Cheatsheet
No Comments