nfs-ganesha - Data Structure

常用缩写

  • FSAL - File System Abstract Layer
  • DRC - Duplicate Request/Reply Cache
  • CMAL - Cluster Management Abstraction Layer
  • XDR - eXternal Data Representation
  • SAL - State Abstraction Layer
  • cih - Cache inode hashed dictionary
  • TI-PRC (transport-independent RPC)
  • TS-RPC (transport-specific RPC)

1. FSAL module

每个FSAL module对应一个so文件,例如libfsalvfs.so。每个FSAL module描述了对某种文件系统的实现。有时也简称为fsal或module,对应struct fsal_module,变量名常用fsal或者fsal_hdl。

常见的FSAL module

  • PSEUDO, 用于根目录,或其他pseudo fs
  • MDCACHE(比较特殊)
  • VFS
  • CEPH
struct fsal_module {
    struct glist_head fsals;    //所有fsal通过它连成链表,存在`fsal_list`全局变量中
    struct glist_head exports;//这个fsal下所有export形成的链表头
    struct glist_head handles;//这个fsal下所有handle形成的链表头
    struct glist_head servers;//Data Servers链表头,pnfs使用用
    char *path; //so模块的path
    char *name; //module的名字
    void *dl_handle;//当用dlopen动态加载时候,返回的句柄
    struct fsal_ops m_ops;  这个模块提供的通用函数,如unload,create_export等等
    pthread_rwlock_t lock;//在使用上面的链表时候,用到的lock
    int32_t refcount;//Reference count 
    struct fsal_stats *stats;   /*< for storing the FSAL specific stats */
    struct fsal_staticfsinfo_t fs_info; /*< for storing FSAL static info */
};

相关函数:

  • load_fsal: 指定文件系统的名字,如VFS,加载相应模块,例如libfsalvfs.so,返回module。
  • unload_fsal
  • lookup_fsal: 指定文件系统的名字,从内存里找到module。
  • register_fsal:将module注册进系统,加入到fsal_list全局变量中。

2. FSAL Export

在配置文件中每描述一个export,ganesha就创建一个export,对应一个文件系统,每个export都有一个export id,都对应一个FSAL module。ganesha会自动创建一个根目录的export,其export id是0。 可以理解export是FSAL module的一个实例。

EXPORT
{
    Export_ID=1;
    Path = /tmp;
    Pseudo = /vfs;
    Access_Type = RW;
    Protocols = 4;
    Transports = TCP;
    FSAL
    {
        Name = VFS;
    }
}

2.1 gsh_export

struct gsh_export是在解析配置文件中用到的数据,不是太重要。

struct gsh_export {
    struct glist_head exp_list; //表头是全局变量 exportlist
    struct avltree_node node_k; //通过它存储AVL tree,key是export_id
    char *fullpath;
    char *pseudopath; //PseudoFS path 
    uint16_t export_id; 
        struct fsal_export *fsal_export; //指向fsal export
...
};

相关函数:

  • alloc_export
  • free_export
  • insert_gsh_export
  • get_gsh_export
  • get_gsh_export_by_path
  • mount_gsh_export
  • remove_gsh_export
  • foreach_gsh_export

2.2 fsal_export

struct fsal_export代表着export。

struct fsal_export {
    struct glist_head exports;//fsal->exports是所有相同FSAL的export的链表头
    struct fsal_module *fsal;   //指向fsal
    const struct fsal_up_vector *up_ops;    //Upcall operations
    struct export_ops exp_ops;//不需要inode的一些操作,如lookup_path,set_quota等
    struct fsal_export *sub_export; //下面的export
    struct fsal_export *super_export;//上面的export
    uint16_t export_id; //export id
};

有意思的是export可以分层,形成一个stack的结构。其实一般分为两层,最上层是MDCACHE对应的export,下层是真正文件系统对应(如VFS)的export。这样的好处是,IO先进入到MDCACHE的export,如果能处理则直接返回,如果不能则调用下层export去处理。
相关函数:

  • fsal_attach_export 将export插入到这个FSAL对应链表中
  • fsal_export_stack 将两个export黏在一起,形成上下层关系

2.3 扩展的FSAL export

对于不同FSAL来说,都会扩展export的含义。如FSAL VFS的export定义为:

struct vfs_fsal_export {
    struct fsal_export export; //由fsal_export扩展而来
    struct fsal_filesystem *root_fs;//根目录的fs
    struct glist_head filesystems;//在这个export下所有的FS形成的链表头
    int fsid_type;
    bool async_hsm_restore;
};

再如FSAL MDCACHE的export定义为:

struct mdcache_fsal_export {
    struct fsal_export mfe_exp; //由fsal_export扩展而来
    char *name;
    struct fsal_up_vector up_ops;
    struct fsal_up_vector super_up_ops;
    struct glist_head entry_list;
    pthread_rwlock_t mdc_exp_lock;
    uint8_t flags;
};

不同FSAL的export的创建是由不同函数实现的

fsal->m_ops.create_export(...)

例如对应VFS的create_export函数指针对应vfs_create_export()

3. Object handle

对应struct fsal_obj_handle,变量名常用obj。每个object handle对应一个文件或者目录。

struct fsal_obj_handle {
    struct glist_head handles;//所有相同fsal的handle通过它形成链表,链表头是fsal-> handles
    struct fsal_filesystem *fs;//指向隶属的FS
    struct fsal_module *fsal;   //指向fsal module
    struct fsal_obj_ops *obj_ops;   //如lookup,readdir,getattrs,read2,write2等等。
    pthread_rwlock_t obj_lock;
    object_file_type_t type;    /*< Object file type */
    fsal_fsid_t fsid;   
    uint64_t fileid;    //在相同fsid范围下,唯一区分的ID,如inode number
    struct state_hdl *state_hdl;    // obj相关的state,参看vfs_state_locate()
};

3.1 扩充的Object handle

对于不同FSAL来说,需要扩充这个对象。例如VFS对应的Object handle:

struct vfs_fsal_obj_handle {
    struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
    fsal_dev_t dev;
    vfs_file_handle_t *handle;
    struct vfs_subfsal_obj_ops *sub_ops;    /*< Optional subfsal ops */
    const struct fsal_up_vector *up_ops;    /*< Upcall operations */
    union {
        struct {
            struct fsal_share share;
            struct vfs_fd fd;
        } file;
        struct {
            unsigned char *link_content;
            int link_size;
        } symlink;
        struct {
            vfs_file_handle_t *dir;
            char *name;
        } unopenable;
    } u;
};

例如MDCACHE对应的Object handle:

struct mdcache_fsal_obj_handle {
    struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
    struct fsal_obj_handle *sub_handle; //mdcache的下层是更加实际的fsal,如VFS的handle
...
}

4. 不同level的ops函数指针

FSAL module的ops

struct fsal_ops,FSAL module级别的ops

struct fsal_ops def_fsal_ops = {
    .unload = unload_fsal,
    .init_config = init_config,
    .dump_config = dump_config,
    .create_export = create_export,
    .emergency_cleanup = emergency_cleanup,
    .getdeviceinfo = getdeviceinfo,
    .fs_da_addr_size = fs_da_addr_size,
    .fsal_pnfs_ds = fsal_pnfs_ds,
    .fsal_pnfs_ds_ops = fsal_pnfs_ds_ops,
    .fsal_extract_stats = fsal_extract_stats,
    .fsal_reset_stats = fsal_reset_stats,
};

4.2 export的ops

struct export_ops,文件系统级级别的ops

void vfs_export_ops_init(struct export_ops *ops)
{
    ops->release = release;
    ops->lookup_path = vfs_lookup_path;
    ops->wire_to_host = wire_to_host;
    ops->create_handle = vfs_create_handle;
    ops->get_fs_dynamic_info = get_dynamic_info;
    ops->get_quota = get_quota;
    ops->set_quota = set_quota;
    ops->alloc_state = vfs_alloc_state;
    ops->free_state = vfs_free_state;
}

4.3 obj的ops

struct fsal_obj_ops,文件对象层的ops。

void vfs_handle_ops_init(struct fsal_obj_ops *ops)
{
    fsal_default_obj_ops_init(ops);

    ops->release = release;
    ops->merge = vfs_merge;
    ops->lookup = lookup;
    ops->readdir = read_dirents;
    ops->mkdir = makedir;
    ops->mknode = makenode;
    ops->symlink = makesymlink;
    ops->readlink = readsymlink;
    ops->getattrs = vfs_getattr2;
    ops->link = linkfile;
    ops->rename = renamefile;
    ops->unlink = file_unlink;
    ops->close = vfs_close;
    ops->handle_to_wire = handle_to_wire;
    ops->handle_to_key = handle_to_key;
    ops->open2 = vfs_open2;
    ops->reopen2 = vfs_reopen2;
    ops->read2 = vfs_read2;
    ops->write2 = vfs_write2;
    ops->commit2 = vfs_commit2;
    ops->list_ext_attrs = vfs_list_ext_attrs;
    ops->getextattr_id_by_name = vfs_getextattr_id_by_name;
    ops->getextattr_value_by_name = vfs_getextattr_value_by_name;
    ops->getextattr_value_by_id = vfs_getextattr_value_by_id;
    ops->setextattr_value = vfs_setextattr_value;
    ops->setextattr_value_by_id = vfs_setextattr_value_by_id;
    ops->remove_extattr_by_id = vfs_remove_extattr_by_id;
    ops->remove_extattr_by_name = vfs_remove_extattr_by_name;
}

5. MDCACHE和其他FSAL的交互

MDCACHE的export位于其他FSAL的上层。

mdcache_read2()
{
...
    subcall(
        entry->sub_handle->obj_ops->read2(entry->sub_handle, bypass,
                         mdc_read_cb, read_arg, arg)
           );
}

6. IO处理线程的op_ctx

每个IO处理线程有个线程变量op_ctx,指向nfs_rpc_process_request()下的req_ctx局部变量。

__thread struct req_op_context *op_ctx;

会在函数nfs_rpc_process_request的开头设置op_ctx,并在结尾清理op_ctx
init_root_op_context初始化op_ctx
release_root_op_context恢复op_ctx
nfs4_mds_putfh()设置op_ctx->ctx_exportop_ctx->fsal_export
在所有的IO处理线程中,可以方便的从op_ctx得到export信息。
那么op_ctx都存了些什么东西呢,

struct req_op_context {
    struct user_cred *creds;    /*< resolved user creds from request */
    struct user_cred original_creds;    /*< Saved creds */
    struct group_data *caller_gdata;
    gid_t *caller_garray_copy;  /*< Copied garray from AUTH_SYS */
    gid_t *managed_garray_copy; /*< Copied garray from managed gids */
    int cred_flags;     /* Various cred flags */
    sockaddr_t *caller_addr;    //IP connection info
    const uint64_t *clientid;   //Client ID 
    uint32_t nfs_vers;
    uint32_t nfs_minorvers;
    uint32_t req_type;  /*< request_type NFS | 9P */
    struct gsh_client *client;  //client host info,将TCP,UDP,9P的连接都隐藏在内
    struct gsh_export *ctx_export;//指向gsh_export
    struct fsal_export *fsal_export;//current export ,一般是DCACHE的export
    struct export_perms *export_perms;  /*< Effective export perms */
    nsecs_elapsed_t start_time; //start time of this op/request 
    nsecs_elapsed_t queue_wait; //time in wait queue 
    void *fsal_private;     /*< private for FSAL use */
    struct fsal_module *fsal_module;    //fsal module,一般是DCACHE
    struct fsal_pnfs_ds *fsal_pnfs_ds;  //pnfs相关
};

7. Compound中Operation的处理函数

static const struct nfs4_op_desc optabv4[] = {
    [0] = { /* all out of bounds illegals go here to die */
        .name = "OP_ILLEGAL",
        .funct = nfs4_op_illegal,
        .free_res = nfs4_op_illegal_Free,
        .resp_size = sizeof(ILLEGAL4res),
        .exp_perm_flags = 0},
    [1] = {
        .name = "OP_ILLEGAL",
        .funct = nfs4_op_illegal,
        .free_res = nfs4_op_illegal_Free,
        .resp_size = sizeof(ILLEGAL4res),
        .exp_perm_flags = 0},
    [2] = {
        .name = "OP_ILLEGAL",
        .funct = nfs4_op_illegal,
        .free_res = nfs4_op_illegal_Free,
        .resp_size = sizeof(ILLEGAL4res),
        .exp_perm_flags = 0},
    [NFS4_OP_ACCESS] = {
        .name = "OP_ACCESS",
        .funct = nfs4_op_access,
        .free_res = nfs4_op_access_Free,
        .resp_size = sizeof(ACCESS4res),
        .exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
    [NFS4_OP_CLOSE] = {
        .name = "OP_CLOSE",
        .funct = nfs4_op_close,
        .free_res = nfs4_op_close_Free,
        .resp_size = sizeof(CLOSE4res),
        .exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
    [NFS4_OP_COMMIT] = {
        .name = "OP_COMMIT",
        .funct = nfs4_op_commit,
        .free_res = nfs4_op_commit_Free,
        .resp_size = sizeof(COMMIT4res),
        .exp_perm_flags = EXPORT_OPTION_MD_WRITE_ACCESS}
    //...
}

推荐阅读更多精彩内容

  • 第一天 7月13日OCP笔记: Oracle Ocp11g准备资料: OracleFundmentals 书 管理...
    fjxCode阅读 1,325评论 0 3
  • 一、Python简介和环境搭建以及pip的安装 4课时实验课主要内容 【Python简介】: Python 是一个...
    _小老虎_阅读 4,062评论 0 10
  • 我的文章引来了好多喜欢我的妹妹,(为嘛我不是个男的,如果我是男的,我可以撩多少妹纸啊!)我在好几个妹妹身上发现了一...
    旷野里的树儿阅读 212评论 14 19
  • 她曾经说过喜欢蓝色的天空, 就在那么一小片的天空下。 土地上还种着没发芽的花, 那次她说, 除了留在你身旁, 我什...
    小幸运小开心阅读 84评论 0 3
  • 原來身體是會騙人的。 誰說身體不騙人呢。 因爲極端渇望,致使身體出現相應的一繫列生理反應,而在謊言被打破的一瞬間,...
    theBigVivi阅读 20评论 0 0