仓库源文站点原文


layout: post comments: true title: 修改V8和node.js建立中文后端开发工具 description: 修改V8编译器源码, 实现关键词的中文化. Modify source code of V8 JavaScript compiler to support keywords in Chinese. date: 2018-09-19 00:00:00 -0700

categories: 语言设计 汉化关键词

本文github: https://github.com/swizl/zwnode

一. 修改V8

V8在一个文件里改就行了。

\deps\v8\src\parsing\scanner.cc

  1. 从llvm中下一个轮子过来的StringSwitch,当然要改一下。StringRef改成std::wstring, 多余的功能去掉。放在 scanner.cc 开头。

狗屎咖啡:为clang添加中文关键字​

template<typename T, typename R = T>
class StringSwitch {
  std::wstring Str;
  const T *Result;

public:
  //
  explicit StringSwitch(std::wstring S)
  : Str(S), Result(nullptr) { }

  // StringSwitch is not copyable.
  StringSwitch(const StringSwitch &) = delete;
  void operator=(const StringSwitch &) = delete;

  StringSwitch(StringSwitch &&other) {
    *this = std::move(other);
  }
  StringSwitch &operator=(StringSwitch &&other) {
    Str = other.Str;
    Result = other.Result;
    return *this;
  }

  ~StringSwitch() = default;

  // Case-sensitive case matchers
  template<unsigned N>
  StringSwitch& Case(const wchar_t (&S)[N], const T& Value) {
    assert(N);
    if (!Result && N-1 == Str.size() &&
        //(N == 1 || std::memcmp(S, Str.data(), (N-1)*sizeof(wchar_t)) == 0)) {
        (N == 1 || Str.compare(S) == 0)) {
      Result = &Value;
    }
    return *this;
  }

  R Default(const T &Value) const {
    if (Result)
      return *Result;
    return Value;
  }


  operator R() const {
    assert(Result && "Fell off the end of a string-switch");
    return *Result;
  }
};
  1. 加上中文的关键字和英文关键字一样对应, 可以根据自己的喜好随便改。
    #define KEYWORDS(KEYWORD_GROUP, KEYWORD, ZWKEYWORD)                    \
    KEYWORD_GROUP('a')                                        \
    KEYWORD("arguments", Token::ARGUMENTS)                    \
    ZWKEYWORD("诸参", Token::ARGUMENTS)                    \
    KEYWORD("as", Token::AS)                                  \
    ZWKEYWORD("以", Token::AS)                                  \
    KEYWORD("async", Token::ASYNC)                            \
    ZWKEYWORD("异", Token::ASYNC)                            \
    KEYWORD("await", Token::AWAIT)                            \
    ZWKEYWORD("等", Token::AWAIT)                            \
    KEYWORD("anonymous", Token::ANONYMOUS)                    \
    ZWKEYWORD("匿", Token::ANONYMOUS)                    \
    KEYWORD_GROUP('b')                                        \
    KEYWORD("break", Token::BREAK)                            \
    ZWKEYWORD("破", Token::BREAK)                            \
    KEYWORD_GROUP('c')                                        \
    KEYWORD("case", Token::CASE)                              \
    ZWKEYWORD("例", Token::CASE)                              \
    KEYWORD("catch", Token::CATCH)                            \
    ZWKEYWORD("捕", Token::CATCH)                            \
    KEYWORD("class", Token::CLASS)                            \
    ZWKEYWORD("类", Token::CLASS)                            \
    KEYWORD("const", Token::CONST)                            \
    ZWKEYWORD("常", Token::CONST)                            \
    KEYWORD("constructor", Token::CONSTRUCTOR)                \
    ZWKEYWORD("构造器", Token::CONSTRUCTOR)                \
    KEYWORD("continue", Token::CONTINUE)                      \
    ZWKEYWORD("继", Token::CONTINUE)                      \
    KEYWORD_GROUP('d')                                        \
    KEYWORD("debugger", Token::DEBUGGER)                      \
    ZWKEYWORD("调试器", Token::DEBUGGER)                      \
    KEYWORD("default", Token::DEFAULT)                        \
    ZWKEYWORD("默", Token::DEFAULT)                        \
    KEYWORD("delete", Token::DELETE)                          \
    ZWKEYWORD("删", Token::DELETE)                          \
    KEYWORD("do", Token::DO)                                  \
    ZWKEYWORD("行", Token::DO)                                  \
    KEYWORD_GROUP('e')                                        \
    KEYWORD("else", Token::ELSE)                              \
    ZWKEYWORD("另", Token::ELSE)                              \
    KEYWORD("enum", Token::ENUM)                              \
    ZWKEYWORD("举", Token::ENUM)                              \
    KEYWORD("eval", Token::EVAL)                              \
    ZWKEYWORD("估", Token::EVAL)                              \
    KEYWORD("export", Token::EXPORT)                          \
    ZWKEYWORD("导", Token::EXPORT)                          \
    KEYWORD("extends", Token::EXTENDS)                        \
    ZWKEYWORD("承", Token::EXTENDS)                        \
    KEYWORD_GROUP('f')                                        \
    KEYWORD("false", Token::FALSE_LITERAL)                    \
    ZWKEYWORD("假", Token::FALSE_LITERAL)                    \
    KEYWORD("finally", Token::FINALLY)                        \
    ZWKEYWORD("末", Token::FINALLY)                        \
    KEYWORD("for", Token::FOR)                                \
    ZWKEYWORD("于", Token::FOR)                                \
    KEYWORD("from", Token::FROM)                              \
    ZWKEYWORD("从", Token::FROM)                              \
    KEYWORD("function", Token::FUNCTION)                      \
    ZWKEYWORD("函数", Token::FUNCTION)                      \
    KEYWORD_GROUP('g')                                        \
    KEYWORD("get", Token::GET)                                \
    ZWKEYWORD("取", Token::GET)                                \
    KEYWORD_GROUP('i')                                        \
    KEYWORD("if", Token::IF)                                  \
    ZWKEYWORD("如", Token::IF)                                  \
    KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
    ZWKEYWORD("成", Token::FUTURE_STRICT_RESERVED_WORD) \
    KEYWORD("import", Token::IMPORT)                          \
    ZWKEYWORD("引", Token::IMPORT)                          \
    KEYWORD("in", Token::IN)                                  \
    ZWKEYWORD("在", Token::IN)                                  \
    KEYWORD("instanceof", Token::INSTANCEOF)                  \
    ZWKEYWORD("是为", Token::INSTANCEOF)                  \
    KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD)  \
    ZWKEYWORD("接", Token::FUTURE_STRICT_RESERVED_WORD)  \
    KEYWORD_GROUP('l')                                        \
    KEYWORD("let", Token::LET)                                \
    ZWKEYWORD("让", Token::LET)                                \
    KEYWORD_GROUP('m')                                        \
    KEYWORD("meta", Token::META)                              \
    ZWKEYWORD("元", Token::META)                                \
    KEYWORD_GROUP('n')                                        \
    KEYWORD("name", Token::NAME)                              \
    ZWKEYWORD("名", Token::NAME)                              \
    KEYWORD("new", Token::NEW)                                \
    ZWKEYWORD("新", Token::NEW)                                \
    KEYWORD("null", Token::NULL_LITERAL)                      \
    ZWKEYWORD("无", Token::NULL_LITERAL)                      \
    KEYWORD_GROUP('o')                                        \
    KEYWORD("of", Token::OF)                                  \
    ZWKEYWORD("之", Token::OF)                                  \
    KEYWORD_GROUP('p')                                        \
    KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD)    \
    ZWKEYWORD("包", Token::FUTURE_STRICT_RESERVED_WORD)    \
    KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD)    \
    ZWKEYWORD("私", Token::FUTURE_STRICT_RESERVED_WORD)    \
    KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD)  \
    ZWKEYWORD("保", Token::FUTURE_STRICT_RESERVED_WORD)  \
    KEYWORD("prototype", Token::PROTOTYPE)                    \
    ZWKEYWORD("原型", Token::PROTOTYPE)                    \
    KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD)     \
    ZWKEYWORD("公", Token::FUTURE_STRICT_RESERVED_WORD)     \
    KEYWORD_GROUP('r')                                        \
    KEYWORD("return", Token::RETURN)                          \
    ZWKEYWORD("返", Token::RETURN)                          \
    KEYWORD_GROUP('s')                                        \
    KEYWORD("set", Token::SET)                                \
    ZWKEYWORD("设", Token::SET)                                \
    KEYWORD("static", Token::STATIC)                          \
    ZWKEYWORD("固", Token::STATIC)                          \
    KEYWORD("super", Token::SUPER)                            \
    ZWKEYWORD("超", Token::SUPER)                            \
    KEYWORD("switch", Token::SWITCH)                          \
    ZWKEYWORD("切", Token::SWITCH)                          \
    KEYWORD_GROUP('t')                                        \
    KEYWORD("target", Token::TARGET)                          \
    ZWKEYWORD("标", Token::TARGET)                          \
    KEYWORD("this", Token::THIS)                              \
    ZWKEYWORD("此", Token::THIS)                              \
    KEYWORD("throw", Token::THROW)                            \
    ZWKEYWORD("抛", Token::THROW)                            \
    KEYWORD("true", Token::TRUE_LITERAL)                      \
    ZWKEYWORD("真", Token::TRUE_LITERAL)                      \
    KEYWORD("try", Token::TRY)                                \
    ZWKEYWORD("试", Token::TRY)                                \
    KEYWORD("typeof", Token::TYPEOF)                          \
    ZWKEYWORD("之型", Token::TYPEOF)                          \
    KEYWORD_GROUP('u')                                        \
    KEYWORD("undefined", Token::UNDEFINED)                    \
    ZWKEYWORD("未定", Token::UNDEFINED)                    \
    KEYWORD_GROUP('v')                                        \
    KEYWORD("var", Token::VAR)                                \
    ZWKEYWORD("变", Token::VAR)                                \
    KEYWORD("void", Token::VOID)                              \
    ZWKEYWORD("空", Token::VOID)                              \
    KEYWORD_GROUP('w')                                        \
    KEYWORD("while", Token::WHILE)                            \
    ZWKEYWORD("当", Token::WHILE)                            \
    KEYWORD("with", Token::WITH)                              \
    ZWKEYWORD("为之", Token::WITH)                              \
    KEYWORD_GROUP('y')                                        \
    KEYWORD("yield", Token::YIELD)                            \
    ZWKEYWORD("降", Token::YIELD)                            \
    KEYWORD_GROUP('_')                                        \
    KEYWORD("__proto__", Token::PROTO_UNDERSCORED)            \
    ZWKEYWORD("__原__", Token::PROTO_UNDERSCORED)              \
    KEYWORD_GROUP('#')                                        \
    KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)        \
    ZWKEYWORD("#构造器", Token::PRIVATE_CONSTRUCTOR)
    

3.仿照KeywordOrIdentifierToken写一个ZWKeywordOrIdentifierToken

static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
                                             int input_length) {
  DCHECK_GE(input_length, 1);
  const int kMinLength = 2;
  const int kMaxLength = 12;
  if (input_length < kMinLength || input_length > kMaxLength) {
    return Token::IDENTIFIER;
  }
  switch (input[0]) {
    default:
#define ZWKEYWORD(keyword, token)
#define KEYWORD_GROUP_CASE(ch)                                \
      break;                                                  \
    case ch:
#define KEYWORD(keyword, token)                                           \
  {                                                                       \
    /* 'keyword' is a char array, so sizeof(keyword) is */                \
    /* strlen(keyword) plus 1 for the NUL char. */                        \
    const int keyword_length = sizeof(keyword) - 1;                       \
    STATIC_ASSERT(keyword_length >= kMinLength);                          \
    STATIC_ASSERT(keyword_length <= kMaxLength);                          \
    DCHECK_EQ(input[0], keyword[0]);                                      \
    DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD ||                 \
           0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
    if (input_length == keyword_length && input[1] == keyword[1] &&       \
        (keyword_length <= 2 || input[2] == keyword[2]) &&                \
        (keyword_length <= 3 || input[3] == keyword[3]) &&                \
        (keyword_length <= 4 || input[4] == keyword[4]) &&                \
        (keyword_length <= 5 || input[5] == keyword[5]) &&                \
        (keyword_length <= 6 || input[6] == keyword[6]) &&                \
        (keyword_length <= 7 || input[7] == keyword[7]) &&                \
        (keyword_length <= 8 || input[8] == keyword[8]) &&                \
        (keyword_length <= 9 || input[9] == keyword[9]) &&                \
        (keyword_length <= 10 || input[10] == keyword[10])) {             \
      return token;                                                       \
    }                                                                     \
  }
      KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD, ZWKEYWORD)
  }
  return Token::IDENTIFIER;
//#undef KEYWORDS
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
#undef ZWKEYWORD
}

#define IS_WCHAR_EQ_U16() (sizeof(wchar_t) == sizeof(uint16_t))
static Token::Value ZWKeywordOrIdentifierToken(const uint16_t* input,
                                             int input_length) {

#define ZWKEYWORD(keyword, token) .Case(L##keyword, token)
#define KEYWORD_GROUP_CASE(ch)
#define KEYWORD(keyword, token)
        std::wstring temp_input;
        if (IS_WCHAR_EQ_U16())
            temp_input.append((const wchar_t *)input, input_length);
        else for (int i = 0; i < input_length; ++i)
            temp_input.push_back((wchar_t)input[i]);
        return StringSwitch<Token::Value>(temp_input)
        KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD, ZWKEYWORD)
            .Default(Token::IDENTIFIER);
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
#undef ZWKEYWORD
#undef KEYWORDS
}
  1. 在 ScanIdentifierOrKeywordInner 或 ScanIdentifierOrKeyword 中添加中文关键字返回token的逻辑,

node.js的稳点版,改点在ScanIdentifierOrKeyword中:

  if (next_.literal_chars->is_one_byte()) {
    Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    Token::Value token =
        KeywordOrIdentifierToken(chars.start(), chars.length());
    if (token == Token::IDENTIFIER ||
        token == Token::FUTURE_STRICT_RESERVED_WORD ||
        Token::IsContextualKeyword(token))
      literal.Complete();
    return token;
  }  else {
    Vector<const uint16_t> chars = next_.literal_chars->two_byte_literal();
    Token::Value token =
        ZWKeywordOrIdentifierToken(chars.start(), chars.length());
    if (token == Token::IDENTIFIER ||
        token == Token::FUTURE_STRICT_RESERVED_WORD ||
        Token::IsContextualKeyword(token))
      literal.Complete();
    return token;
  }

node.js在github上的master,改点在ScanIdentifierOrKeywordInner中:

if (next_.literal_chars->is_one_byte()) {
    Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    Token::Value token =
        KeywordOrIdentifierToken(chars.start(), chars.length());
    /* TODO(adamk): YIELD should be handled specially. */
    if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
      literal->Complete();
      if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
      return token;
    }
    if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
      literal->Complete();
      return token;
    }

    if (!escaped) return token;

    literal->Complete();
    if (token == Token::LET || token == Token::STATIC) {
      return Token::ESCAPED_STRICT_RESERVED_WORD;
    }
    return Token::ESCAPED_KEYWORD;
  } else {
    Vector<const uint8_t> chars = next_.literal_chars->two_byte_literal();
    Token::Value token =
        ZWKeywordOrIdentifierToken(chars.start(), chars.length());
    /* TODO(adamk): YIELD should be handled specially. */
    if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
      literal->Complete();
      if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
      return token;
    }
    if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
      literal->Complete();
      return token;
    }

    if (!escaped) return token;

    literal->Complete();
    if (token == Token::LET || token == Token::STATIC) {
      return Token::ESCAPED_STRICT_RESERVED_WORD;
    }
    return Token::ESCAPED_KEYWORD;
  }

二. 修改node.js的 js库,对内置对象和函数添加中文名称

修改实例:

添加 “控制台.日志” 等效 “console.log”

1.添加 “console” 中文名 “控制台”.

lib\internal\bootstrap\node.js // github master

lib\internal\bootstrap_node.js // web download

    Object.defineProperty(global, 'console', {
      configurable: true,
      enumerable: true,
      get() {
        return wrappedConsole;
        }
    });
    // 仿照添加 “控制台” 等效 “console”
    Object.defineProperty(global, '控制台', {
      configurable: true,
      enumerable: true,
      get() {
        return wrappedConsole;
       }
    });
  1. 添加 “log” 函数中文名 “日志”

lib\console.js:

Console.prototype.debug = Console.prototype.log;
// 仿照debug和info,添加一条“日志” 等效 “log”
Console.prototype.日志 = Console.prototype.log;

Console.prototype.info = Console.prototype.log;

三 编译

./configure
make all

四 运行实例:

hello.js //放在了 out/Release 中

函数 说1(词) {
    控制台.日志(词);
}
函数 执行(某函数, 值) {
    某函数(值);
}
变 你 = "你";
变 哈哈 = {
    哈1: '好',
     哈2: 12
}

执行(说1, 你);
控制台.日志(哈哈);

运行:

cd out/Release
./node hello.js

结果:

你
{ '哈1': '好', '哈2': 12 }