Skip to main content

awk

教學網站

搜尋字串

awk '/^this/{print $0}'         #與 sed -n '/^this/p' 相同

搜尋取代

# 移除分號
awk { gsub(/\;/, "") }

移除重複資料的行

我們經常使用 sort 與 uniq 指令,從檔案中找出並移除重複項目。不過如果你不希望你的原始檔被排序或更動,這時正是 awk 派上用場的時候,我們可以用 awk 截取不重複記錄並儲存在新的檔案中

awk '!x[$0]++' filewithdupes > newfile

搜尋含 disabled 的行,並列出第 1, 3 欄的內容

awk '/disabled/{print $1, $3}'

取值做計算

awk '{print "up " $1 /60 " minutes"}' /proc/uptime

df -lP -text4 |awk '{sum += $4} END {printf "%d GiB\n", sum/1048576}'
df -lP -text4 |awk '{sum += $4} END {printf "%d GiB\n", sum/2**20}'

加上判斷式

df -k |grep "/dev/" | awk '($2 > 0 && ((1 - $3/$2)  > 0.9) ) {print $0 }'

awk -F" " '{print ($7 != "A")?$0"***":$0}' myfile

搜尋每行的第9欄,如果不是 0x00000000 時就顯示該行訊息

cat info.out | awk '($9 != "0x00000000") {print}'

列出 uid >= 500 且 <= 10000 的行

export UGIDLIMIT=500
awk -v LIMIT=$UGIDLIMIT -F: '($3>=LIMIT) && ($3<=10000)' /etc/passwd

解決長整數顯示問題

$ awk  'BEGIN {print 12345678901234567890}'
1.23457e+19

方法一
$ awk  'BEGIN {printf("%d\n", 12345678901234567890)}'
12345678901234567168

方法二
$ awk  'BEGIN {OFMT="%.0f"; print 12345678901234567890}'
12345678901234567168 

列出 uid=0 的帳號

awk -F: '($3 == "0") {print}' /etc/passwd

列出最後一個欄位的值

ls -ltd */ | awk -F ' ' '{print $NF}'

列出長度大於 64 的行

awk 'length > 64'

格式化輸出

awk '{ printf("1-minute: %s\n5-minute: %s\n15-minute: \
 %s\n",$1,$2,$3); }' /proc/loadavg

計算目錄的檔案大小

foldersize() {
    if [ -d $1 ]; then
        ls -alRF $1/ | grep '^-' | awk 'BEGIN {tot=0} { tot=tot+$5 } END { print tot }'
    else
        echo "$1: folder does not exist"
    fi
    }

計算單字總數(以符號 "空格" 作為單字的識別)

awk '{total=total+NF}; END {print total+0}'

搜尋特定字串的文字段落內容

# lspci -v | awk '/ATI/,/^$/'
01:03.0 VGA compatible controller: ATI Technologies Inc Rage XL (rev 27) (prog-if 00 [VGA])
        Subsystem: Compaq Computer Corporation: Unknown device 001e
        Flags: bus master, stepping, medium devsel, latency 64
        Memory at fc000000 (32-bit, non-prefetchable) [size=16M]
        I/O ports at 3000 [size=256]
        Memory at fbff0000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: [5c] Power Management version 2

批次 Kill 名稱包含有 /plugins/mactrack 的程式

ps -ef | grep "/plugins/mactrack" | awk '{system("kill " $2);}'

使用兩個不同的區隔符號: 空格 + =

# 顯示回應時間
ping 8.8.8.8 | awk -F[\ =] '{print $10}'

取出每十行的資料(第 10, 20, 30, ...)

awk '!(NR % 10)' file

自動清理舊檔案,保留最近一個檔案

# Sort nmon files by time, delete a file far from the current time, always keep only one nmon file:
ls -t ~/*.nmon |awk '/\.nmon/ {if (NR >1){system ("rm " $1)}}'

CSV 指定欄位值

  • GoAWK - A POSIX-compliant AWK interpreter written in Go, with CSV support
# 統計第 13 欄 APPNAME 每個值的計數
cat ./ISO27001/db2/fdctest_validate.csv |  awk -F, '{a[$13]++} END {for (k in a) print k, a[k]}'

# 變更欄位 3 的值 
awk '{ $3 = toupper(substr($3,1,1)) substr($3,2) } $3' FS=, OFS=, file

# 欄位 3 變更成大寫
awk '$3 { print toupper($0); }' file

清除空白行

awk NF test.txt

段落文字的解析

script: aud2csv.sh

Raw Data:

timestamp=2023-01-08-23.13.02.322992;
  category=CHECKING;
  audit event=CHECKING_OBJECT;
  event correlator=107;
  event status=0;
  database=RPTDB;
  userid=winmfg;
  authid=WINMFG;
  application id=10.8.25.30.64020.230108151301;
  application name=EXCEL.EXE;
  package schema=NULLID;
  package name=SYSSH200;
  package section=4;
  object schema=ISTRPT;
  object name=FHOPEHS;
  object type=TABLE;
  access approval reason=OBJECT;
  access attempted=SELECT;
  local transaction id=0x00000001b6550792;
  global transaction id=0x0000000000000000000000000000000000000000;
  instance name=istrpt;
  hostname=BSMDB_B;

函式說明:

# 宣告: 紀錄的間隔符號為 空行, 欄位間隔符號為 =
# 一個段落文字為一筆紀錄,每一行以 = 為間隔區別不同欄位
BEGIN {
    FS="=";
    RS="";
}

# 過濾條件: 欄位總數是 nfp2 的值的資料 
# 此實例包含了有不一致欄位數的紀錄,所以必須先做過濾
NF==nfp2 {

}

# 移除 分號字元
{ gsub(/\;/, "") }

# TIMESTAMP = 欄位 2
# CATEGORY = 欄位 4
# f1 = 欄位 1
# f2 = 欄位 3
{
    TIMESTAMP=2; CATEGORY=4; AUDIT_EVENT=6; EVENT_CORRELATOR=8; EVENT_STATUS=10; DATABASE=12; USERID=14; AUTHID=16; APPLICATION_ID=18; APPLICATION_NAME=20; PACKAGE_SCHEMA=22; PACKAGE_NAME=24; PACKAGE_SECTION=26; OBJECT_SCHEMA=28; OBJECT_NAME=30; OBJECT_TYPE=32; ACCESS_APPROVAL_REASON=34; ACCESS_ATTEMPTED=36; LOCAL_TRANSACTION_ID=38; GLOBAL_TRANSACTION_ID=40; INSTANCE_NAME=42; HOSTNAME=44;  
    
    f1=1; f2=3; f3=5; f4=7; f5=9; f6=11; f7=13; f8=15; f9=17; f10=19; f11=21; f12=23; f13=25; f14=27; f15=29; f16=31; f17=33; f18=35; f19=37; f20=39; f21=41; f22=43 
}

# 印出 CSV 的 Header 行
if (! headline)
    {
        headline = sprintf( "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s", $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22 );
        print headline;
    }

# 印出 CSV 的資料
dataline = sprintf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s", $TIMESTAMP, $CATEGORY, $AUDIT_EVENT, $EVENT_CORRELATOR, $EVENT_STATUS, $DATABASE, $USERID, $AUTHID, $APPLICATION_ID, $APPLICATION_NAME, $PACKAGE_SCHEMA, $PACKAGE_NAME, $PACKAGE_SECTION, $OBJECT_SCHEMA, $OBJECT_NAME, $OBJECT_TYPE, $ACCESS_APPROVAL_REASON, $ACCESS_ATTEMPTED, $LOCAL_TRANSACTION_ID, $GLOBAL_TRANSACTION_ID, $INSTANCE_NAME, $HOSTNAME );
print dataline;

Cheatsheet

awk_cheatsheet.png

AWK.jpg