int yyparse (class THD *YYTHD, class Parse_tree_root **parse_tree) { ...
/* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ if (yychar == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); yychar = YYLEX; //调用词法分析 }
{ ... case MY_LEX_IDENT: constchar *start; if (use_mb(cs)) { result_state = IDENT_QUOTED; switch (my_mbcharlen(cs, lip->yyGetLast())) { case1: break; case0: if (my_mbmaxlenlen(cs) < 2) break; /* else fall through */ default: int l = my_ismbchar(cs, lip->get_ptr() - 1, lip->get_end_of_query()); if (l == 0) { state = MY_LEX_CHAR; continue; } lip->skip_binary(l - 1); } while (ident_map[c = lip->yyGet()]) { switch (my_mbcharlen(cs, c)) { case1: break; case0: if (my_mbmaxlenlen(cs) < 2) break; /* else fall through */ default: int l; if ((l = my_ismbchar(cs, lip->get_ptr() - 1, lip->get_end_of_query())) == 0) break; lip->skip_binary(l - 1); } } } else { for (result_state = c; ident_map[c = lip->yyGet()]; result_state |= c) ; /* If there were non-ASCII characters, mark that we must convert */ result_state = result_state & 0x80 ? IDENT_QUOTED : IDENT; } length = lip->yyLength(); start = lip->get_ptr(); if (lip->ignore_space) { /* If we find a space then this can't be an identifier. We notice this below by checking start != lex->ptr. */ for (; state_map[c] == MY_LEX_SKIP; c = lip->yyGet()) { if (c == '\n') lip->yylineno++; } } if (start == lip->get_ptr() && c == '.' && ident_map[lip->yyPeek()]) lip->next_state = MY_LEX_IDENT_SEP; else { // '(' must follow directly if function lip->yyUnget(); if ((tokval = find_keyword(lip, length, c == '('))) { lip->next_state = MY_LEX_START; // Allow signed numbers return (tokval); // Was keyword } lip->yySkip(); // next state does a unget } yylval->lex_str = get_token(lip, 0, length); /* Note: "SELECT _bla AS 'alias'" _bla should be considered as a IDENT if charset haven't been found. So we don't use MYF(MY_WME) with get_charset_by_csname to avoid producing an error. */ if (yylval->lex_str.str[0] == '_') { auto charset_name = yylval->lex_str.str + 1; const CHARSET_INFO *cs = get_charset_by_csname(charset_name, MY_CS_PRIMARY, MYF(0)); if (cs) { lip->warn_on_deprecated_charset(cs, charset_name); if (cs == &my_charset_utf8mb4_0900_ai_ci) { /* If cs is utf8mb4, and the collation of cs is the default collation of utf8mb4, then update cs with a value of the default_collation_for_utf8mb4 system variable: */ cs = thd->variables.default_collation_for_utf8mb4; } yylval->charset = cs; lip->m_underscore_cs = cs; lip->body_utf8_append(lip->m_cpp_text_start, lip->get_cpp_tok_start() + length); return (UNDERSCORE_CHARSET); } } lip->body_utf8_append(lip->m_cpp_text_start); lip->body_utf8_append_literal(thd, &yylval->lex_str, cs, lip->m_cpp_text_end); return (result_state); // IDENT or IDENT_QUOTED ... }