MySQL MySQL py_innodb_page_info.py表空间分析 Mr.蓝桉 2026-01-04 介绍 由《MySQL技术内幕 InnoDB存储引擎》一书的作者姜承尧开发的 py_innodb_page_info.py 是一款功能强大的工具,旨在深入分析MySQL表空间中的页面类型及其详细信息。该工具采用Python编写,由三个文件组成:py_innodb_page_info.py、mylib.py 和 include.py。为了确保工具的正常运行,这三个文件需要放置在同一个目录下。
值得注意的是,该工具需要在Python 2的环境中运行。如果在Python 3环境下使用,可能会遇到兼容性问题,导致运行出错。此外,py_innodb_page_info.py 工具专门设计用于解析MySQL 5.7版本的.ibd文件。对于MySQL 8.0及更高版本的.ibd文件,该工具可能无法正确解析。
include.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 #include.py #encoding=utf-8 INNODB_PAGE_SIZE = 1024 * 16 # InnoDB Page 16K # Start of the data on the page FIL_PAGE_DATA = 38 FIL_PAGE_OFFSET = 4 # page offset inside space FIL_PAGE_TYPE = 24 # File page type # Types of an undo log segment */ TRX_UNDO_INSERT = 1 TRX_UNDO_UPDATE = 2 # On a page of any file segment, data may be put starting from this offset FSEG_PAGE_DATA = FIL_PAGE_DATA # The offset of the undo log page header on pages of the undo log TRX_UNDO_PAGE_HDR = FSEG_PAGE_DATA PAGE_LEVEL = 26 #level of the node in an index tree; the leaf level is the level 0 */ innodb_page_type = { '0000':u'Freshly Allocated Page', '0002':u'Undo Log Page', '0003':u'File Segment inode', '0004':u'Insert Buffer Free List', '0005':u'Insert Buffer Bitmap', '0006':u'System Page', '0007':u'Transaction system Page', '0008':u'File Space Header', '0009':u'Extend Description Page', '000a':u'Uncompressed BLOB Page', '000b':u'1st compressed BLOB Page', '000c':u'Subsequent compressed BLOB Page', '45bf':u'B-tree Node' } innodb_page_direction = { '0000': 'Unknown(0x0000)', '0001': 'Page Left', '0002': 'Page Right', '0003': 'Page Same Rec', '0004': 'Page Same Page', '0005': 'Page No Direction', 'ffff': 'Unkown2(0xffff)' }
mylib.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 #encoding=utf-8 import os import include from include import * VARIABLE_FIELD_COUNT = 1 NULL_FIELD_COUNT = 0 class myargv(object): def __init__(self, argv): self.argv = argv self.parms = {} self.tablespace = '' # 解析输入参数 def parse_cmdline(self): argv = self.argv if len(argv) == 1: print 'Usage: python py_innodb_page_info.py [OPTIONS] tablespace_file' print 'For more options, use python py_innodb_page_info.py -h' return 0 while argv: if argv[0][0] == '-': if argv[0][1] == 'h': self.parms[argv[0]] = '' argv = argv[1:] break if argv[0][1] == 'v': self.parms[argv[0]] = '' argv = argv[1:] else: self.parms[argv[0]] = argv[1] argv = argv[2:] else: self.tablespace = argv[0] argv = argv[1:] if self.parms.has_key('-h'): print 'Get InnoDB Page Info' print 'Usage: python py_innodb_page_info.py [OPTIONS] tablespace_file\n' print 'The following options may be given as the first argument:' print '-h help ' print '-o output put the result to file' print '-t number thread to anayle the tablespace file' print '-v verbose mode' return 0 return 1 def mach_read_from_n(page,start_offset,length): ret = page[start_offset:start_offset+length] return ret.encode('hex') # main方法 def get_innodb_page_type(myargv): # 读取 .ibd 文件,存入 f f=file(myargv.tablespace,'rb') # INNODB_PAGE_SIZE=16KB,f 一定是16KB的整数倍,一页大小是16KB # 使用 f 的字节总数除以 16KB,得到的 fsize 表示页数 fsize = os.path.getsize(f.name)/INNODB_PAGE_SIZE ret = {} # 枚举每一页 for i in range(fsize): # 读取16KB数据,存入page,是第i的数据 # page 是二进制文件 page = f.read(INNODB_PAGE_SIZE) # page_offset 是 page[4, 8) 字符所表示的数据 page_offset = mach_read_from_n(page,FIL_PAGE_OFFSET,4) # page_type 是 page[24, 26) 字符所表示的数据 page_type = mach_read_from_n(page,FIL_PAGE_TYPE,2) # 如果加了参数 -v,表示输出详细数据 if myargv.parms.has_key('-v'): # page_type 为 45bf,表示 B-tree Node,输出它的信息 if page_type == '45bf': # page_level 是 page[64, 66) 字符所表示的数据 page_level = mach_read_from_n(page,FIL_PAGE_DATA+PAGE_LEVEL,2) print "page offset %s, page type <%s>, page level <%s>"%(page_offset,innodb_page_type[page_type],page_level) # page_type 对应 include.py 中的 innodb_page_type,输出它的信息 else: print "page offset %s, page type <%s>"%(page_offset,innodb_page_type[page_type]) # 统计数据 if not ret.has_key(page_type): ret[page_type] = 1 else: ret[page_type] = ret[page_type] + 1 # 输出fsize,表示有多少页 print "Total number of page: %d:"%fsize # 输出统计数据 for type in ret: print "%s: %s"%(innodb_page_type[type],ret[type])
py_innodb_page_info.py 1 2 3 4 5 6 7 8 9 10 11 12 13 14 #! /usr/bin/env python #encoding=utf-8 import mylib from sys import argv from mylib import myargv # main函数 if __name__ == '__main__': myargv = myargv(argv) if myargv.parse_cmdline() == 0: pass else: # 执行main方法 mylib.get_innodb_page_type(myargv)
示例 先创建个example_table空表,看下example_table.ibd,可以看到总共分配了11page(Total number of page: 11),但是为啥空闲的只有6page(B-tree Node: 6),这里就不得不提到之前说到的预留空间了
1 2 3 4 5 6 7 8 9 10 11 12 13 14 CREATE TABLE example_table( id INT AUTO_INCREMENT, first_name VARCHAR(50), last_name VARCHAR(50), email VARCHAR(100), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, status ENUM('active', 'inactive') DEFAULT 'active', PRIMARY KEY (id), INDEX idx_last_name (last_name), FULLTEXT INDEX ftx_first_name (first_name), INDEX idx_status (status), INDEX idx_created_at (created_at), INDEX idx_first_name_last_name (first_name, last_name) );
1 python2 py_innodb_page_info.py -v demo1/example_table.ibd
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 page offset 00000000, page type <File Space Header> page offset 00000001, page type <Insert Buffer Bitmap> page offset 00000002, page type <File Segment inode> page offset 00000003, page type <B-tree Node>, page level <0000> //数据页,0001表示根层,0000表示叶子层 page offset 00000004, page type <B-tree Node>, page level <0000> page offset 00000005, page type <B-tree Node>, page level <0000> page offset 00000006, page type <B-tree Node>, page level <0000> page offset 00000007, page type <B-tree Node>, page level <0000> page offset 00000008, page type <B-tree Node>, page level <0000> page offset 00000000, page type <Freshly Allocated Page> page offset 00000000, page type <Freshly Allocated Page> Total number of page: 11: //总共分配的页数 Freshly Allocated Page: 2 //可用的数据页 Insert Buffer Bitmap: 1 //插入缓冲页 File Space Header: 1 //文件空间头 B-tree Node: 6 //数据页 File Segment inode: 1 //文件端inonde
然后再使用存储过程插入1W行数据,再分析example_table.ibd
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 DELIMITER $$ CREATE PROCEDURE InsertExampleData() BEGIN DECLARE i INT DEFAULT 1; DECLARE maxEmail INT DEFAULT 1000; DECLARE firstName VARCHAR(50); DECLARE lastName VARCHAR(50); DECLARE email VARCHAR(100); DECLARE status ENUM('active', 'inactive'); DECLARE timestamp DATETIME; WHILE i <= 10000 DO SET firstName = CONCAT('Name', LPAD(i, 5, '0')); SET lastName = CONCAT('Surname', LPAD(i, 5, '0')); SET email = CONCAT(firstName, '_', lastName, '_', i, '@example.com'); SET status = IF(i % 2 = 0, 'active', 'inactive'); SET timestamp = DATE_ADD(NOW(), INTERVAL FLOOR(RAND() * 365) DAY); INSERT INTO example_table (first_name, last_name, email, created_at, status) VALUES (firstName, lastName, email, timestamp, status); SET i = i + 1; END WHILE; END$$ DELIMITER ;
1 CALL InsertExampleData();
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 page offset 00000000, page type <File Space Header> page offset 00000001, page type <Insert Buffer Bitmap> page offset 00000002, page type <File Segment inode> page offset 00000003, page type <B-tree Node>, page level <0001> page offset 00000004, page type <B-tree Node>, page level <0001> page offset 00000005, page type <B-tree Node>, page level <0001> page offset 00000006, page type <B-tree Node>, page level <0001> page offset 00000007, page type <B-tree Node>, page level <0001> page offset 00000008, page type <B-tree Node>, page level <0001> page offset 00000009, page type <B-tree Node>, page level <0000> page offset 0000000a, page type <B-tree Node>, page level <0000> page offset 0000000b, page type <B-tree Node>, page level <0000> page offset 0000000c, page type <B-tree Node>, page level <0000> page offset 0000000d, page type <B-tree Node>, page level <0000> page offset 0000000e, page type <B-tree Node>, page level <0000> page offset 0000000f, page type <B-tree Node>, page level <0000> page offset 00000010, page type <B-tree Node>, page level <0000> .... page offset 00000000, page type <Freshly Allocated Page> page offset 00000000, page type <Freshly Allocated Page> page offset 00000000, page type <Freshly Allocated Page> page offset 00000000, page type <Freshly Allocated Page> page offset 00000000, page type <Freshly Allocated Page> Total number of page: 640: //总共分配的页数 Freshly Allocated Page: 493 //可用的数据页 Insert Buffer Bitmap: 1 //插入缓冲页 File Space Header: 1 //文件空间头 B-tree Node: 144 //数据页 File Segment inode: 1 //文件端inonde
这时可以看到总共分配640页(Total number of page)即10240kb,使用了144页(B-tree Node)即2304kb,可用的数据页493页(Freshly Allocated Page)即7888kb
同时去看information_schema.tables中example_table的信息,可以看到DATA_LENGTH+INDEX_LENGTH=1589248byte+1261568byte=2850816byte=2784kb,DATA_FREE=4194304byte=4096kb,
DATA_LENGTH+INDEX_LENGTH+DATA_FREE=6880kb 10240-6880=3360kb
Insert Buffer Bitmap+File Space Header+File Segment inode=48kb
那这里为什么6880kb( DATA_LENGTH+INDEX_LENGTH+DATA_FREE)要远远小于10240kb(Total number of page),这是除了之前提到的预留空间还有碎片率吗?
ok,我们带着这个问题去分析。
1 select * from information_schema.tables where table_schema='demo1' and table_name='example_table'\G
这里我们计算下空间碎片率Space Debris Rate=(70451204194304)×100≈59.53%
1 2 3 4 5 6 7 8 9 10 11 12 13 14 SELECT table_name, table_rows, ROUND(data_length / (1024 * 1024 * 1024), 2) AS data_length_gb, ROUND(index_length / (1024 * 1024 * 1024), 2) AS index_length_gb, data_free, CASE WHEN (data_length + index_length + data_free) > 0 THEN ROUND(data_free / (data_length + index_length + data_free) * 100, 2) ELSE 0 END AS space_debris_rate FROM information_schema.tables WHERE table_schema = 'demo1' AND table_name = 'example_table';
这里我们将example_table的数据全部删除然后analyze table example_table; ,再查看information_schema.tables的统计信息,然后执行optimize table demo1.example_table; ,再去分析。最后可以看到DATA_LENGTH+INDEX_LENGTH+DATA_FREE=16384+ 81920+0=96kb=6page=B-tree Node