CBoard+ES 快速搭建自助交互式数据分析平台(BI)

背景

之前我们基于ES构建了内容中心的全站搜索,现在作品的制作情况也需要在前端BI层面进行交互分析,BI层的实时聚合部分我们采用ES来实现;为了不影响内容搜索业务的正常运行,我们选择新建一个集群来支撑上述功能,通过reindex操作实现跨集群数据同步;_reindex操作并不是es7.0才有的功能,通过这个操作可以快速实现索引的复制、重建、跨集群迁移

搭建流程

整个交互式数据分析平台分两部分构成:CBoard + ES。

图表操作界面

  • CBoard
    因本篇侧重ES的使用讲解,cboard细节可参看相关资料解决;cboard是一款开源的自助分析工具,主要分4部分组成,创建数据源(这里我们选用ES)> 创建数据集 > 创建图标 > 创建看板。因创建图表功能支持用户拖拉拽操作,所以这部分操作对用户来说非常简单;部署完该项目后基本不需要二次开发,启动后配置完数据源便可以使用;有难度的是自助分析需要有一个强大的实时计算引擎支撑;经测试ES可满足在现有数据的任意维度的聚合分析,图标的加载性能优化后可秒级响应。

  • ES
    a、数据同步;这里采用_reindex的方式每天增量从内容库迁移,配置如下:

POST _reindex?slices=5&refresh
{
  "source": {
     "remote": {   #配置需要抽取的ES源地址
      "host": "http://source_host:9200"
    },
    "index": ["scene_model","ls_model","print_model"],   # 指定从哪些数据索引中抽取数据
    "_source": ["code","cover","id", "product", "title", "create_time", "publish_time", "update_time", "total_pv" ,"total_uv" ,"total_form", "login_id", "user_reg_time"],   #执行抽取的维度,对应下面的mapping设置
    "size": 1000,    #抽取批次大小
    "query": {       #增量抽取昨天有过发布更新的数据
      "range": {
        "publish_time": {
          "gte": "now-1d/d"
        }
      }
    }
  },
  "dest": {
    "index": "work_model",  #目标索引名称
    "version_type": "external"  #类似upsert操作
  },
  "script": {
    "lang": "painless",
    "source": "ctx._id = ctx._index.substring('as2_'.length(), ctx._index.length()) + '_' + ctx._id  ;ctx._source.index = ctx._index.substring('as2_'.length(), ctx._index.length()) "    #因目标索引来自多个索引库的数据,为避免Id冲突,给目标索引增加对应的_index前缀
  }
}

b、mapping & setting

{
    "mappings" : {
        "dynamic" : "false", 
        "dynamic_templates" : [
          {
            "strings" : {
              "match_mapping_type" : "string",
              "mapping" : {
                "doc_values" : false,
                "norms" : false,
                "type" : "keyword"
              }
            }
          }
        ],
        "date_detection" : false,
        "properties" : {
          "biz_type" : {
            "type" : "keyword" 
          },
          "check_status" : {
            "type" : "keyword" ,
            "doc_values" : false
          },
          "code" : {
            "type" : "keyword" ,
            "doc_values" : false
          }, 
          "cover" : {
            "type" : "keyword",
            "index" : false,
            "doc_values" : false
          },
          "create_time" : {
            "type" : "date",
            "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
          },
          "create_user" : {
            "type" : "keyword" 
          }, 
          "enterprise" : {
            "type" : "keyword" 
          },
          "id" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "is_del" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "login_id" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "member_type" : {
            "type" : "keyword" 
          },
          "product" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "publish_time" : {
            "type" : "date",
            "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
          },
          "share_type" : {
            "type" : "keyword" 
          },
          "template_code" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "template_title" : {
            "type" : "text",
            "index_options" : "freqs",
            "analyzer" : "eqs_analyzer",
            "search_analyzer" : "ik_smart"
          }, 
          "title" : {
            "type" : "text",
            "index_options" : "freqs", 
            "analyzer" : "eqs_analyzer",
            "search_analyzer" : "ik_smart"
          },
          "total_form" : {
            "type" : "integer",
            "ignore_malformed" : true
          },
          "total_pv" : {
            "type" : "integer",
            "ignore_malformed" : true
          },
          "total_spv" : {
            "type" : "integer",
            "ignore_malformed" : true
          },
          "total_uv" : {
            "type" : "integer",
            "ignore_malformed" : true
          },
          "update_time" : {
            "type" : "date",
            "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
          },
          "user_name" : {
            "type" : "keyword",
            "doc_values" : false
          },
          "user_phone" : {
            "type" : "keyword" 
          },
          "user_type" : {
            "type" : "keyword" 
          },
          "yesterday_pv" : {
            "type" : "integer",
            "ignore_malformed" : true
          },
          "yesterday_uv" : {
            "type" : "integer",
            "ignore_malformed" : true
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "refresh_interval" : "120s",
        "translog" : {
          "flush_threshold_size" : "1024mb",
          "sync_interval" : "120s",
          "durability" : "async"
        }, 
        "max_result_window" : "20000", 
        "store" : {
          "type" : "niofs"
        },
        "unassigned" : {
          "node_left" : {
            "delayed_timeout" : "1d"
          }
        },
        "analysis" : {
          "analyzer" : {
            "eqs_highlight_analyzer" : {
              "filter" : [
                "unique"
              ],
              "type" : "custom",
              "tokenizer" : "letter"
            },
            "eqs_analyzer" : {
              "filter" : [
                "unique"
              ],
              "char_filter" : [
                "html_strip"
              ],
              "type" : "custom",
              "tokenizer" : "ik_max_word"
            }
          }
        },
        "number_of_replicas" : "0",
       
        "codec" : "best_compression",
        "routing" : {
          "allocation" : {
            "total_shards_per_node" : "10"
          }
        },
        "search" : {
          "slowlog" : {
            "level" : "info",
            "threshold" : {
              "fetch" : {
                "info" : "500ms"
              },
              "query" : {
                "info" : "1s"
              }
            }
          }
        },
        "number_of_shards" : "8",
        "merge" : {
          "scheduler" : {
            "max_thread_count" : "2"
          }
        }
      }
    }
  
}

推荐阅读更多精彩内容