「43」Redis AOF持久化

前序:

AOF开启的配置:

1
2
3
appendonly yes
#appendonly no
appendfsync everysec

但是redis根据这配置到底是如何实现 AOF的呢?

appendOnly起点:

通过config.c配置文件可以查到appendOnly对应的变量:

1
createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly)

关于aof_enable使用地方:

再看下aof_state使用地方:

主要方向:

  • aof在启动时候加载顺序
  • aof存文件如何被触发执行的
  • aof保存的机制/同步策略?
  • aof机制和rdb混用?
  • aof如何保持较高的性能?

server启动,aof开启,加载流程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
/* Replay the append log file. On success C_OK is returned. On non fatal
* error (the append only file is zero-length) C_ERR is returned. On
* fatal error an error message is logged and the program exists. */
int loadAppendOnlyFile(char *filename) {
struct client *fakeClient;
FILE *fp = fopen(filename,"r");
struct redis_stat sb;
int old_aof_state = server.aof_state;
long loops = 0;
off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */
off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */

if (fp == NULL) {
serverLog(LL_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
exit(1);
}

/* Handle a zero-length AOF file as a special case. An empty AOF file
* is a valid AOF because an empty server with AOF enabled will create
* a zero length file at startup, that will remain like that if no write
* operation is received. */
if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
server.aof_current_size = 0;
server.aof_fsync_offset = server.aof_current_size;
fclose(fp);
return C_ERR;
}

/* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
* to the same file we're about to read. */
server.aof_state = AOF_OFF;
// 创建aof客户端
fakeClient = createAOFClient();
//加载文件
startLoadingFile(fp, filename, RDBFLAGS_AOF_PREAMBLE);

/* Check if this AOF file has an RDB preamble. In that case we need to
* load the RDB file and later continue loading the AOF tail. */
// 检查aof头部有没有rdb格式的内容,先加载rdb再加载aof
char sig[5]; /* "REDIS" */
if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
/* No RDB preamble, seek back at 0 offset. */
// 没有rdb格式的,
if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
} else {
/* RDB preamble. Pass loading the RDB functions. */
rio rdb;

serverLog(LL_NOTICE,"Reading RDB preamble from AOF file...");
if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
rioInitWithFile(&rdb,fp);
// 加载rdb格式的文件
if (rdbLoadRio(&rdb,RDBFLAGS_AOF_PREAMBLE,NULL) != C_OK) {
serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted");
goto readerr;
} else {
serverLog(LL_NOTICE,"Reading the remaining AOF tail...");
}
}

/* Read the actual AOF file, in REPL format, command by command. */
// 按照aof协议来读取/解析aof文件
while(1) {
int argc, j;
unsigned long len;
robj **argv;
char buf[128];
sds argsds;
struct redisCommand *cmd;

/* Serve the clients from time to time */
if (!(loops++ % 1000)) {
loadingProgress(ftello(fp));
processEventsWhileBlocked();
processModuleLoadingProgressEvent(1);
}

if (fgets(buf,sizeof(buf),fp) == NULL) {
if (feof(fp))
break;
else
goto readerr;
}
if (buf[0] != '*') goto fmterr;
if (buf[1] == '\0') goto readerr;
argc = atoi(buf+1);
if (argc < 1) goto fmterr;

/* Load the next command in the AOF as our fake client
* argv. */
argv = zmalloc(sizeof(robj*)*argc);
fakeClient->argc = argc;
fakeClient->argv = argv;

for (j = 0; j < argc; j++) {
/* Parse the argument len. */
char *readres = fgets(buf,sizeof(buf),fp);
if (readres == NULL || buf[0] != '$') {
fakeClient->argc = j; /* Free up to j-1. */
freeFakeClientArgv(fakeClient);
if (readres == NULL)
goto readerr;
else
goto fmterr;
}
len = strtol(buf+1,NULL,10);

/* Read it into a string object. */
argsds = sdsnewlen(SDS_NOINIT,len);
if (len && fread(argsds,len,1,fp) == 0) {
sdsfree(argsds);
fakeClient->argc = j; /* Free up to j-1. */
freeFakeClientArgv(fakeClient);
goto readerr;
}
argv[j] = createObject(OBJ_STRING,argsds);

/* Discard CRLF. */
if (fread(buf,2,1,fp) == 0) {
fakeClient->argc = j+1; /* Free up to j. */
freeFakeClientArgv(fakeClient);
goto readerr;
}
}

/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
// 未知命令,启动直接退出了
if (!cmd) {
serverLog(LL_WARNING,
"Unknown command '%s' reading the append only file",
(char*)argv[0]->ptr);
exit(1);
}

if (cmd == server.multiCommand) valid_before_multi = valid_up_to;

/* Run the command in the context of a fake client */
fakeClient->cmd = fakeClient->lastcmd = cmd;
if (fakeClient->flags & CLIENT_MULTI &&
fakeClient->cmd->proc != execCommand)
{
queueMultiCommand(fakeClient);
} else {
cmd->proc(fakeClient);
}

/* The fake client should not have a reply */
serverAssert(fakeClient->bufpos == 0 &&
listLength(fakeClient->reply) == 0);

/* The fake client should never get blocked */
serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0);

/* Clean up. Command code may have changed argv/argc so we use the
* argv/argc of the client instead of the local variables. */
freeFakeClientArgv(fakeClient);
fakeClient->cmd = NULL;
if (server.aof_load_truncated) valid_up_to = ftello(fp);
if (server.key_load_delay)
debugDelay(server.key_load_delay);
}

/* This point can only be reached when EOF is reached without errors.
* If the client is in the middle of a MULTI/EXEC, handle it as it was
* a short read, even if technically the protocol is correct: we want
* to remove the unprocessed tail and continue. */
if (fakeClient->flags & CLIENT_MULTI) {
serverLog(LL_WARNING,
"Revert incomplete MULTI/EXEC transaction in AOF file");
valid_up_to = valid_before_multi;
goto uxeof;
}

loaded_ok: /* DB loaded, cleanup and return C_OK to the caller. */
fclose(fp);
freeFakeClient(fakeClient);
server.aof_state = old_aof_state;
stopLoading(1);
aofUpdateCurrentSize();
server.aof_rewrite_base_size = server.aof_current_size;
server.aof_fsync_offset = server.aof_current_size;
return C_OK;

readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */
if (!feof(fp)) {
if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
fclose(fp);
serverLog(LL_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
exit(1);
}

uxeof: /* Unexpected AOF end of file. */
if (server.aof_load_truncated) {
serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file !!!");
serverLog(LL_WARNING,"!!! Truncating the AOF at offset %llu !!!",
(unsigned long long) valid_up_to);
if (valid_up_to == -1 || truncate(filename,valid_up_to) == -1) {
if (valid_up_to == -1) {
serverLog(LL_WARNING,"Last valid command offset is invalid");
} else {
serverLog(LL_WARNING,"Error truncating the AOF file: %s",
strerror(errno));
}
} else {
/* Make sure the AOF file descriptor points to the end of the
* file after the truncate call. */
if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) {
serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s",
strerror(errno));
} else {
serverLog(LL_WARNING,
"AOF loaded anyway because aof-load-truncated is enabled");
goto loaded_ok;
}
}
}
if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
fclose(fp);
serverLog(LL_WARNING,"Unexpected end of file reading the append only file. You can: 1) Make a backup of your AOF file, then use ./redis-check-aof --fix <filename>. 2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.");
exit(1);

fmterr: /* Format error. */
if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
fclose(fp);
serverLog(LL_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
exit(1);
}
流程图:

aof触发流程:

参照前面的方法,先找到appendfsync对应的变量,然后看使用场景。

1
createEnumConfig("appendfsync", NULL, MODIFIABLE_CONFIG, aof_fsync_enum, server.aof_fsync, AOF_FSYNC_EVERYSEC, NULL, NULL),
appendfsync取值
1
2
3
4
5
6
configEnum aof_fsync_enum[] = {
{"everysec", AOF_FSYNC_EVERYSEC}, 每秒
{"always", AOF_FSYNC_ALWAYS}, 每个操作
{"no", AOF_FSYNC_NO}, 由操作系统决定何时同步
{NULL, 0}
};
aof_fsync_enum哪里用到了?

流程图:

aof和RDB混用的情况

配置项:
1
2
3
4
5
aof-use-rdb-preamble  true/false

config.c配置:

createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, server.aof_use_rdb_preamble, 1, NULL, NULL),
流程图:

aof文件如何“高效”持久化

如何持久化到硬盘?「flushAppendOnlyFile」
1
2
3
4
5
flushAppendOnlyFile函数

说明:

Write the append only file buffer on disk.
流程图:

AOF APPEND ONLY数据量过大

通过源码先看看流程:

Tips And Tricks

AOF数据量过大问题

Todo

End