Linux nfs_readpage同步读与nfs_pageio_opendesc
Linux nfs_readpage同步读与nfs_pageio_opendescnfs_readpage是NFS客户端页缓存回填路径的入口函数负责将单个page从NFS服务端同步读取到本地页缓存。该函数定义在fs/nfs/read.c中是address_space_operations结构体中的readpage回调。当内核缺页处理或用户态发起pread/mmap访问未缓存页时VMM层通过filemap_fault或generic_file_read_iter最终调用该函数。nfs_readpage的函数原型为cstatic int nfs_readpage(struct file *file, struct page *page){struct nfs_open_context *ctx;struct inode *inode page_file_mapping(page)-host;struct nfs_pageio_descriptor desc;int ret;trace_nfs_readpage_enter(inode, page);ret nfs_read_add_folio(file, page_folio(page));trace_nfs_readpage_exit(inode, page, ret);return ret;}实际工作由nfs_read_add_folio完成其核心逻辑如下cstatic int nfs_read_add_folio(struct file *file, struct folio *folio){struct inode *inode folio-mapping-host;struct nfs_pageio_descriptor pgio;struct nfs_page *req;int ret;nfs_pageio_init_read(pgio, inode, false, nfs_async_read_completion_ops);req nfs_page_create_from_folio(file, folio, 0, folio_size(folio));if (IS_ERR(req)) {nfs_pageio_complete(pgio);return PTR_ERR(req);}ret nfs_pageio_add_request(pgio, req);if (ret 0)nfs_pageio_complete(pgio);nfs_pageio_cleanup(pgio);return pgio.pg_error;}这里的关键数据结构是struct nfs_pageio_descriptor它描述了一个NFS I/O请求的完整上下文。nfs_pageio_init_read完成初始化工作cvoid nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,struct inode *inode, bool force_mds,const struct nfs_pageio_ops *compl_ops){struct nfs_server *server NFS_SERVER(inode);const struct nfs_pageio_ops *pg_ops server-nfs_client-rpc_ops-read_ops;nfs_pageio_init(pgio, inode, pg_ops, compl_ops,nfs_rw_read_ops, server-rsize, 0);pgio-pg_ioflags FLUSH_COND_STABLE;if (!force_mds server-pnfs_curr_ld)pgio-pg_ops pnfs_read_ops;}nfs_pageio_init是底层初始化函数设置I/O的size上限由rsize决定、读写类别、以及提交函数集nfs_rw_read_ops。该结构定义如下cstatic const struct nfs_rw_ops nfs_rw_read_ops {.rw_alloc nfs_read_alloc,.rw_free nfs_rw_read_free,.rw_header_init nfs_read_rw_header_init,.rw_done nfs_read_done,.rw_result nfs_read_result,.rw_alloc_private nfs_read_alloc_private,.rw_commit_done nfs_commit_done,};页请求通过nfs_page_create_from_folio创建返回一个struct nfs_page。该结构描述了一个逻辑I/O单元包含页面偏移量、长度、缓冲区指针等。然后调用nfs_pageio_add_request将请求加入descriptor的请求链表当请求数量或大小累积达到阈值时自动触发底层RPC提交。nfs_pageio_add_request内部调用pg_ops-pg_doio函数指针对于直连MDS路径该函数为nfs_pageio_doio它调用nfs_generic_pg_readcint nfs_generic_pg_read(struct nfs_pageio_descriptor *desc){struct nfs_pgio_header *hdr;struct nfs_page *req;int ret;hdr nfs_pgio_header_alloc(desc-pg_rw_ops-rw_alloc);if (!hdr)return -ENOMEM;if (desc-pg_rw_ops-rw_header_init(hdr, desc-pg_ioflags)) {nfs_pgio_header_free(hdr);return -ENOMEM;}nfs_pageio_grouplock(desc);while ((req nfs_pageio_cleanup_request(desc)) ! NULL) {if (!nfs_pageio_add_request_mirror(desc, req))goto out_freereq;}if (list_empty(hdr-pages))goto out_free;nfs_pageio_setup_mirroring(desc);nfs_pageio_error_cleanup(desc);ret nfs_pageio_doio(desc, hdr);nfs_pgio_header_free(hdr);return ret;out_freereq:nfs_release_request(req);out_free:nfs_pgio_header_free(hdr);return -ENOMEM;}nfs_pageio_doio是核心提交函数它调用nfs_generic_read或通过布局驱动完成I/O。对于普通读取流程进入nfs_generic_read该函数构建RPC调用描述符cstatic int nfs_generic_read(struct nfs_pageio_descriptor *desc,struct nfs_pgio_header *hdr){struct nfs_readargs *args hdr-args;struct nfs_readres *res hdr-res;struct rpc_message msg {.rpc_proc nfs_procedures[NFSPROC4_CLNT_READ],.rpc_argp args,.rpc_resp res,};struct rpc_task_setup task_setup {.rpc_client hdr-server-client,.rpc_message msg,.callback_ops nfs_pgio_rpc_call_ops,.workqueue nfsiod_workqueue,.flags RPC_TASK_ASYNC,};args-fh NFS_FH(hdr-inode);args-offset hdr-args-offset;args-count hdr-args-count;args-pages hdr-args-pages;args-pgbase hdr-args-pgbase;if (desc-pg_ioflags FLUSH_COND_STABLE)args-stable NFS_FILE_SYNC;nfs_fattr_init(hdr-res.fattr);return nfs_pageio_complete_read(desc, task_setup, hdr);}在nfs_pageio_complete_read中最终调用rpc_run_task创建并启动一个异步RPC任务。该任务完成后nfs_read_done被调用以检查RPC状态nfs_read_result将数据从接收缓冲区拷贝到页缓存中。nfs_readpage的同步性质体现在调用者进程在nfs_pageio_add_request和之后等待I/O完成时被阻塞。因为nfs_readpage被设计为同步操作当缺页处理程序调用它时当前进程必须等待数据就绪才能继续执行。完成回调通过complete()机制或直接唤醒等待队列来解除阻塞。总结来说nfs_readpage通过nfs_pageio_descriptor实现了一个请求收集-批量提交的流水线模型。descriptor收集多个page请求在达到rsize上限或显式调用nfs_pageio_complete时一次性打包为单个RPC READ请求发送到服务端。这种设计减少了RPC交互次数提高了顺序读的吞吐性能。