sharding-sphere之SQL解析insert

96
一滴水的坚持
2018.06.12 15:38 字数 680

以mysql为例,官网说明insert语句的写法如下:

INSERT [LOW_PRIORITY | DELAYED | HIGH_PRIORITY] [IGNORE]
    [INTO] tbl_name
    [PARTITION (partition_name [, partition_name] ...)]
    [(col_name [, col_name] ...)]
    {VALUES | VALUE} (value_list) [, (value_list)] ...
    [ON DUPLICATE KEY UPDATE assignment_list]

INSERT [LOW_PRIORITY | DELAYED | HIGH_PRIORITY] [IGNORE]
    [INTO] tbl_name
    [PARTITION (partition_name [, partition_name] ...)]
    SET assignment_list
    [ON DUPLICATE KEY UPDATE assignment_list]

INSERT [LOW_PRIORITY | HIGH_PRIORITY] [IGNORE]
    [INTO] tbl_name
    [PARTITION (partition_name [, partition_name] ...)]
    [(col_name [, col_name] ...)]
    SELECT ...
    [ON DUPLICATE KEY UPDATE assignment_list]

value:
    {expr | DEFAULT}

value_list:
    value [, value] ...

assignment:
    col_name = value

assignment_list:
    assignment [, assignment] ...

简单来讲,支持一下三种:

--第一种Insert into values
insert into table(column1,column2,column3...) values (data1,data2,data3...)
--第二种Insert into set
insert into table set column1=data1,column2=data2,column3=data3...
--第三种
insert into table select ...

目前来讲,sharding-sphere并不支持第三种。原因在这里:

 //如果词法解析器解析表名后面为select关键字,则抛异常。
 if (lexerEngine.equalAny(DefaultKeyword.SELECT, Symbol.LEFT_PAREN)) {
     throw new UnsupportedOperationException("Cannot INSERT SELECT");
 }

接下来,就以第一种为例,介绍一下整个的语法解析过程:
解析sql:

INSERT INTO `table` (`field1`, `field2`) VALUES (10, 1)

语法解析器会将sql最终解析成SqlStatement,sql的解析都会被Sql语法解析引擎SQLParsingEngine去解析,从SQL解析引擎看起,看这条insert sql都经历了些什么故事?

@Test
public void insertValuesTest() {
    ShardingRule shardingRule = createShardingRuleByLocalHost();
    String insertSQL="INSERT INTO `table` (`field1`, `field2`) VALUES (10, 1)";
    //初始化一个语法解析器
    SQLParsingEngine statementParser = new SQLParsingEngine(DatabaseType.MySQL,insertSQL, shardingRule, null);
    //语法解析器解析
    InsertStatement insertStatement = (InsertStatement) statementParser.parse(false);
    System.out.println(insertStatement);
}

SQLParsingEngine解析过程中,首先判断是否需要读缓存,如果需要则从缓存中取,如果不需要,则初始化一个词法解析器LexerEngine,获取第一个分词,根据分词,数据库类型获取真实的SqlParser,这里为MySQLInsertParser,具体过程在链接:
词法解析器

public SQLStatement parse(final boolean useCache) {
    //是否读缓存
    Optional<SQLStatement> cachedSQLStatement = getSQLStatementFromCache(useCache);
    if (cachedSQLStatement.isPresent()) {
        return cachedSQLStatement.get();
    }
    //词法解析引擎初始化
    LexerEngine lexerEngine = LexerEngineFactory.newInstance(dbType, sql);
    //分词,获取第一个分词,此处为insert
    lexerEngine.nextToken(); //此处为insert分词
    //获取语法解析器,并解析
    SQLStatement result = SQLParserFactory.newInstance(dbType, lexerEngine.getCurrentToken().getType(), shardingRule, lexerEngine, shardingMetaData).parse();
    if (useCache) {
        ParsingResultCache.getInstance().put(sql, result);
    }
    return result;
}

MySQLInsertParser解析过程中,首先获取insert之后的下一个分词,然后交由下一个从句解析器去解析,比如:insert之后,会解析into表名,交由InsertIntoClauseParser去解析,然后解析(,)里的字段信息,交由InsertColumnsClauseParser去解析,然后判断是否是insert...select语句,如果是,则抛异常,表示不支持,否则,当做insert..values语句处理,交由InsertValuesClauseParser处理,若不是insert..values语句,则交由InsertSetClauseParser,当做insert...set语句处理。

@Override
      public final DMLStatement parse() {
        //获取下一分词
        lexerEngine.nextToken();
        //初始化返回的结果
        InsertStatement result = new InsertStatement();
        //InsertIntoClauseParser insertInto从句解析器解析结果
        insertClauseParserFacade.getInsertIntoClauseParser().parse(result);
        //解析()及里面的字段信息
        insertClauseParserFacade.getInsertColumnsClauseParser().parse(result, shardingMetaData);
        //如果是insert select 语句,则不支持
        if (lexerEngine.equalAny(DefaultKeyword.SELECT, Symbol.LEFT_PAREN)) {
            throw new UnsupportedOperationException("Cannot INSERT SELECT");
        }
        //InsertValuesClauseParser解析insert value信息
        insertClauseParserFacade.getInsertValuesClauseParser().parse(result, shardingMetaData);
        //InsertSetClauseParser 解析insert...set信息
        insertClauseParserFacade.getInsertSetClauseParser().parse(result);
        //ON DUPLICATE KEY UPDATE 解析
        insertClauseParserFacade.getInsertDuplicateKeyUpdateClauseParser().parse(result);
        //处理需要自动生成值的列
        processGeneratedKey(result);
        return result;
    }

InsertIntoClauseParser的处理流程:

public void parse(final InsertStatement insertStatement) {
    //如果出现不支持的词在into之前,则终止解析    
    lexerEngine.unsupportedIfEqual(getUnsupportedKeywordsBeforeInto());
    //跳过所有字符,直到出现into
    lexerEngine.skipUntil(DefaultKeyword.INTO);
    //获取into后的下一分词,当然这里就是表名了
    lexerEngine.nextToken();
    //解析表引用关系
    tableReferencesClauseParser.parse(insertStatement, true);
    skipBetweenTableAndValues(insertStatement);
}

而在解析表引用关系时,实质是使用MySQLTableReferencesClauseParser,mysql的table从句解析器解析,具体如下:

@Override
protected void parseTableReference(final SQLStatement sqlStatement, final boolean isSingleTableOnly) {
    //解析表
    parseTableFactor(sqlStatement, isSingleTableOnly);
    //PARTITION 解析
    parsePartition();
    parseIndexHint(sqlStatement);
}  

protected final void parseTableFactor(final SQLStatement sqlStatement, final boolean isSingleTableOnly) {
    final int beginPosition = lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length();
    String literals = lexerEngine.getCurrentToken().getLiterals();
    int skippedSchemaNameLength = 0;
    //获取下一分词
    lexerEngine.nextToken();
    //跳过如果是. 则在点之前的是schema的名称,跳过.
    if (lexerEngine.skipIfEqual(Symbol.DOT)) {
         //跳过schema的长度为skippedSchemaNameLength
        skippedSchemaNameLength = literals.length() + Symbol.DOT.getLiterals().length();
        //表名为当前分词
        literals = lexerEngine.getCurrentToken().getLiterals();
    }
    //获取表名 如果为`tablename`,则返回tablename
    String tableName = SQLUtil.getExactlyValue(literals);
    if (Strings.isNullOrEmpty(tableName)) {
        return;
    }
    //解析别名 
    Optional<String> alias = aliasExpressionParser.parseTableAlias();
    //如果是单表,或者能根据逻辑实体表名获取到表规则等其他条件
    if (isSingleTableOnly || shardingRule.tryFindTableRuleByLogicTable(tableName).isPresent() || shardingRule.findBindingTableRule(tableName).isPresent()
            || shardingRule.getShardingDataSourceNames().getDataSourceNames().contains(shardingRule.getShardingDataSourceNames().getDefaultDataSourceName())) {
        //表的信息记录在返回值里,有数据库的schema名称,表名称,开始位置坐标
        sqlStatement.getSqlTokens().add(new TableToken(beginPosition, skippedSchemaNameLength, literals));
        //表名和别名记录下来
        sqlStatement.getTables().add(new Table(tableName, alias));
    }
    //解析是否强制索引,insert语句直接跳过,不会使用
    parseForceIndex(tableName, sqlStatement);
    //表关联解析,直接跳过
    parseJoinTable(sqlStatement);
    //如果参数是单表,且解析结果不是单表,则直接抛异常
    if (isSingleTableOnly && !sqlStatement.getTables().isSingleTable()) {
        throw new UnsupportedOperationException("Cannot support Multiple-Table.");
    }
}

insert into table具体如何解析结束看完了,接下来,到解析表字段了,InsertColumnsClauseParser出场了。在InsertColumnsClauseParser处理逻辑中,从(开始,解析一个又一个的分词,直到碰到)或者结束分词。如果没有(,则获取全部字段。

public void parse(final InsertStatement insertStatement, final ShardingMetaData shardingMetaData) {
        Collection<Column> result = new LinkedList<>();
        //获取表名
        String tableName = insertStatement.getTables().getSingleTableName();
        //获取需要自动生成字段的值
        Optional<Column> generateKeyColumn = shardingRule.getGenerateKeyColumn(tableName);
        int count = 0;
        //如果是`(`
        if (lexerEngine.equalAny(Symbol.LEFT_PAREN)) {
            do {
                //获取下一个分词,就是获取insert into table(field1,filed2...)中`(`后面的fileld1,或者是`,`后面的field2.
                lexerEngine.nextToken();
                //具体解析里面的字段,返回值name为具体属性
                SQLExpression sqlExpression = basicExpressionParser.parse(insertStatement);
                //根据不同类型,获取列属性名称
                String columnName = null;
                if (sqlExpression instanceof SQLPropertyExpression) {
                    columnName = SQLUtil.getExactlyValue(((SQLPropertyExpression) sqlExpression).getName());
                }
                if (sqlExpression instanceof SQLIdentifierExpression) {
                    columnName = SQLUtil.getExactlyValue(((SQLIdentifierExpression) sqlExpression).getName());
                }
                if (sqlExpression instanceof SQLIgnoreExpression) {
                    columnName = SQLUtil.getExactlyValue(((SQLIgnoreExpression) sqlExpression).getExpression());
                }
                //返回值添加列信息
                result.add(new Column(columnName, tableName));
                if (generateKeyColumn.isPresent() && generateKeyColumn.get().getName().equalsIgnoreCase(columnName)) {
                    //如果有需要自动生成列信息的字段,记录字段位置
                    insertStatement.setGenerateKeyColumnIndex(count);
                }
                count++;
            } while (!lexerEngine.equalAny(Symbol.RIGHT_PAREN) && !lexerEngine.equalAny(Assist.END));
            insertStatement.setColumnsListLastPosition(lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length());
            lexerEngine.nextToken();
        } else {
          //忽略若干代码
        }
        insertStatement.getColumns().addAll(result);
}

接下来要解析values后面的值了,惯例是InsertValuesClauseParser,该从句首先判断是否是values分词的语句,如果是,则解析,如果不是则直接跳过,等待后面insert ...set处理。

public void parse(final InsertStatement insertStatement, final ShardingMetaData shardingMetaData) {
    Collection<Keyword> valueKeywords = new LinkedList<>();
    valueKeywords.add(DefaultKeyword.VALUES);
    valueKeywords.addAll(Arrays.asList(getSynonymousKeywordsForValues()));
    //是否是value,或者是values的关键字,如果是则解析,如果不是,则跳过
    if (lexerEngine.skipIfEqual(valueKeywords.toArray(new Keyword[valueKeywords.size()]))) {
        //解析value值
        parseValues(insertStatement);
    }
}

private void parseValues(final InsertStatement insertStatement) {
    int beginPosition = lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length();
    int endPosition;
    insertStatement.getSqlTokens().add(new InsertValuesToken(beginPosition, insertStatement.getTables().getSingleTableName()));
    do {
        beginPosition = lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length();
        //分词器解析必须是`(`开头
        lexerEngine.accept(Symbol.LEFT_PAREN);
        List<SQLExpression> sqlExpressions = new LinkedList<>();
        int columnsCount = 0;
        do {
            //解析每一个分词sql后面的具体值,对应sql中的INSERT INTO `table` (`field1`, `field2`) VALUES (10, 1)的10和1
            sqlExpressions.add(basicExpressionParser.parse(insertStatement));
            //跳过::
            skipsDoubleColon();
            //列个数+1
            columnsCount++;
            //当解析器遇到的分词为,号时,继续解析
        } while (lexerEngine.skipIfEqual(Symbol.COMMA));
        //删除自动生成的键
        removeGenerateKeyColumn(insertStatement, columnsCount);
        columnsCount = 0;
        int parametersCount = 0;
        AndCondition andCondition = new AndCondition();
        for (Column each : insertStatement.getColumns()) {
            SQLExpression sqlExpression = sqlExpressions.get(columnsCount);
            //如果是分片项,则添加Condition
            if (shardingRule.isShardingColumn(each)) {
                andCondition.getConditions().add(new Condition(each, sqlExpression));
            }
            //如果是自动生成的列
            if (insertStatement.getGenerateKeyColumnIndex() == columnsCount) {
                //生成Condition
                insertStatement.getGeneratedKeyConditions().add(createGeneratedKeyCondition(each, sqlExpression));
            }
            //列个数+1
            columnsCount++;
            if (sqlExpression instanceof SQLPlaceholderExpression) {
                parametersCount++;
            }
        }
        endPosition = lexerEngine.getCurrentToken().getEndPosition();
        //分词解析器必须以)结束
        lexerEngine.accept(Symbol.RIGHT_PAREN);
        //组装数据
        insertStatement.getInsertValues().getInsertValues().add(new InsertValue(DefaultKeyword.VALUES, lexerEngine.getInput().substring(beginPosition, endPosition), parametersCount));
        insertStatement.getConditions().getOrCondition().getAndConditions().add(andCondition);
    } while (lexerEngine.skipIfEqual(Symbol.COMMA));
    insertStatement.setInsertValuesListLastPosition(endPosition);
}

如果是insert...set语句,则会走到InsertSetClauseParser,去解析set语句,如果发现不是set开头的语句,直接return。

public void parse(final InsertStatement insertStatement) {
    //判断是否是set,如不是,直接返回
    if (!lexerEngine.skipIfEqual(getCustomizedInsertKeywords())) {
        return;
    }
    //删除无用的Token
    removeUnnecessaryToken(insertStatement);
    int beginPosition = lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length();
    insertStatement.getSqlTokens().add(new InsertValuesToken(beginPosition, insertStatement.getTables().getSingleTableName()));
    int parametersCount = 0;
    do {
        //解析set之后的数据库列名称
        SQLExpression sqlExpression = basicExpressionParser.parse(insertStatement);
        Column column = null;
        if (sqlExpression instanceof SQLPropertyExpression) {
            column = new Column(SQLUtil.getExactlyValue(((SQLPropertyExpression) sqlExpression).getName()), insertStatement.getTables().getSingleTableName());
        }
        if (sqlExpression instanceof SQLIdentifierExpression) {
            column = new Column(SQLUtil.getExactlyValue(((SQLIdentifierExpression) sqlExpression).getName()), insertStatement.getTables().getSingleTableName());
        }
        if (sqlExpression instanceof SQLIgnoreExpression) {
            column = new Column(SQLUtil.getExactlyValue(((SQLIgnoreExpression) sqlExpression).getExpression()), insertStatement.getTables().getSingleTableName());
        }
        //列属性后面必须紧跟=号
        lexerEngine.accept(Symbol.EQ);
        //获取=号下一分词类型,根据类型组装
        if (lexerEngine.equalAny(Literals.INT)) {
            sqlExpression = new SQLNumberExpression(Integer.parseInt(lexerEngine.getCurrentToken().getLiterals()));
        } else if (lexerEngine.equalAny(Literals.FLOAT)) {
            sqlExpression = new SQLNumberExpression(Double.parseDouble(lexerEngine.getCurrentToken().getLiterals()));
        } else if (lexerEngine.equalAny(Literals.CHARS)) {
            sqlExpression = new SQLTextExpression(lexerEngine.getCurrentToken().getLiterals());
        } else if (lexerEngine.equalAny(DefaultKeyword.NULL)) {
            sqlExpression = new SQLIgnoreExpression(DefaultKeyword.NULL.name());
        } else if (lexerEngine.equalAny(Symbol.QUESTION)) {
            sqlExpression = new SQLPlaceholderExpression(insertStatement.getParametersIndex());
            insertStatement.increaseParametersIndex();
            parametersCount++;
        } else {
            throw new UnsupportedOperationException("");
        }
        //获取下一分词
        lexerEngine.nextToken();
        if (lexerEngine.equalAny(Symbol.COMMA, DefaultKeyword.ON, Assist.END)) {
            //组装Condition
            insertStatement.getConditions().add(new Condition(column, sqlExpression), shardingRule);
        } else {
            //跳过,直到出现,on
            lexerEngine.skipUntil(Symbol.COMMA, DefaultKeyword.ON);
        }
    //如果是,结尾,继续循环,解析其他的字段
    } while (lexerEngine.skipIfEqual(Symbol.COMMA));
    //组装返回数据
    int endPosition = lexerEngine.getCurrentToken().getEndPosition() - lexerEngine.getCurrentToken().getLiterals().length();
    insertStatement.getInsertValues().getInsertValues().add(new InsertValue(DefaultKeyword.VALUES, lexerEngine.getInput().substring(beginPosition, endPosition), parametersCount));
    insertStatement.setInsertValuesListLastPosition(endPosition);
}

insert语句解析过程结束,解析过程复杂,能用如此简洁的代码写完,代码功底深厚令人折服。大写的服。但其实还是有一些能优化的点。
最后,再看一下InsertStatement的信息,解析之后,该对象组装了些啥。

InsertStatement.png

对应sql

INSERT INTO `table` (`field1`, `field2`) VALUES (10, 1)
public final class InsertStatement extends DMLStatement {
    //列属性信息
    private final Collection<Column> columns = new LinkedList<>();
    //自动生成的表字段信息
    private List<GeneratedKeyCondition> generatedKeyConditions = new LinkedList<>();
    //插入的值
    private final InsertValues insertValues = new InsertValues();
    //数据库字段属性最后结束的位置
    private int columnsListLastPosition;
    //自动生成键的位置
    private int generateKeyColumnIndex = -1;
    //insert values最后结束的位置
    private int insertValuesListLastPosition;
}

public abstract class AbstractSQLStatement implements SQLStatement {
    //sql类型
    private final SQLType type;
    //表信息
    private final Tables tables = new Tables();
    //分片信息
    private final Conditions conditions = new Conditions();
    //sql分词信息
    private final List<SQLToken> sqlTokens = new LinkedList<>();
    
    private int parametersIndex;

}
InsertStatement.png
sharding-sphere
Web note ad 1