layout: post comments: true title: 修改V8和node.js建立中文后端开发工具 description: 修改V8编译器源码, 实现关键词的中文化. Modify source code of V8 JavaScript compiler to support keywords in Chinese. date: 2018-09-19 00:00:00 -0700
本文github: https://github.com/swizl/zwnode
一. 修改V8
V8在一个文件里改就行了。
\deps\v8\src\parsing\scanner.cc
template<typename T, typename R = T>
class StringSwitch {
std::wstring Str;
const T *Result;
public:
//
explicit StringSwitch(std::wstring S)
: Str(S), Result(nullptr) { }
// StringSwitch is not copyable.
StringSwitch(const StringSwitch &) = delete;
void operator=(const StringSwitch &) = delete;
StringSwitch(StringSwitch &&other) {
*this = std::move(other);
}
StringSwitch &operator=(StringSwitch &&other) {
Str = other.Str;
Result = other.Result;
return *this;
}
~StringSwitch() = default;
// Case-sensitive case matchers
template<unsigned N>
StringSwitch& Case(const wchar_t (&S)[N], const T& Value) {
assert(N);
if (!Result && N-1 == Str.size() &&
//(N == 1 || std::memcmp(S, Str.data(), (N-1)*sizeof(wchar_t)) == 0)) {
(N == 1 || Str.compare(S) == 0)) {
Result = &Value;
}
return *this;
}
R Default(const T &Value) const {
if (Result)
return *Result;
return Value;
}
operator R() const {
assert(Result && "Fell off the end of a string-switch");
return *Result;
}
};
#define KEYWORDS(KEYWORD_GROUP, KEYWORD, ZWKEYWORD) \
KEYWORD_GROUP('a') \
KEYWORD("arguments", Token::ARGUMENTS) \
ZWKEYWORD("诸参", Token::ARGUMENTS) \
KEYWORD("as", Token::AS) \
ZWKEYWORD("以", Token::AS) \
KEYWORD("async", Token::ASYNC) \
ZWKEYWORD("异", Token::ASYNC) \
KEYWORD("await", Token::AWAIT) \
ZWKEYWORD("等", Token::AWAIT) \
KEYWORD("anonymous", Token::ANONYMOUS) \
ZWKEYWORD("匿", Token::ANONYMOUS) \
KEYWORD_GROUP('b') \
KEYWORD("break", Token::BREAK) \
ZWKEYWORD("破", Token::BREAK) \
KEYWORD_GROUP('c') \
KEYWORD("case", Token::CASE) \
ZWKEYWORD("例", Token::CASE) \
KEYWORD("catch", Token::CATCH) \
ZWKEYWORD("捕", Token::CATCH) \
KEYWORD("class", Token::CLASS) \
ZWKEYWORD("类", Token::CLASS) \
KEYWORD("const", Token::CONST) \
ZWKEYWORD("常", Token::CONST) \
KEYWORD("constructor", Token::CONSTRUCTOR) \
ZWKEYWORD("构造器", Token::CONSTRUCTOR) \
KEYWORD("continue", Token::CONTINUE) \
ZWKEYWORD("继", Token::CONTINUE) \
KEYWORD_GROUP('d') \
KEYWORD("debugger", Token::DEBUGGER) \
ZWKEYWORD("调试器", Token::DEBUGGER) \
KEYWORD("default", Token::DEFAULT) \
ZWKEYWORD("默", Token::DEFAULT) \
KEYWORD("delete", Token::DELETE) \
ZWKEYWORD("删", Token::DELETE) \
KEYWORD("do", Token::DO) \
ZWKEYWORD("行", Token::DO) \
KEYWORD_GROUP('e') \
KEYWORD("else", Token::ELSE) \
ZWKEYWORD("另", Token::ELSE) \
KEYWORD("enum", Token::ENUM) \
ZWKEYWORD("举", Token::ENUM) \
KEYWORD("eval", Token::EVAL) \
ZWKEYWORD("估", Token::EVAL) \
KEYWORD("export", Token::EXPORT) \
ZWKEYWORD("导", Token::EXPORT) \
KEYWORD("extends", Token::EXTENDS) \
ZWKEYWORD("承", Token::EXTENDS) \
KEYWORD_GROUP('f') \
KEYWORD("false", Token::FALSE_LITERAL) \
ZWKEYWORD("假", Token::FALSE_LITERAL) \
KEYWORD("finally", Token::FINALLY) \
ZWKEYWORD("末", Token::FINALLY) \
KEYWORD("for", Token::FOR) \
ZWKEYWORD("于", Token::FOR) \
KEYWORD("from", Token::FROM) \
ZWKEYWORD("从", Token::FROM) \
KEYWORD("function", Token::FUNCTION) \
ZWKEYWORD("函数", Token::FUNCTION) \
KEYWORD_GROUP('g') \
KEYWORD("get", Token::GET) \
ZWKEYWORD("取", Token::GET) \
KEYWORD_GROUP('i') \
KEYWORD("if", Token::IF) \
ZWKEYWORD("如", Token::IF) \
KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("成", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("import", Token::IMPORT) \
ZWKEYWORD("引", Token::IMPORT) \
KEYWORD("in", Token::IN) \
ZWKEYWORD("在", Token::IN) \
KEYWORD("instanceof", Token::INSTANCEOF) \
ZWKEYWORD("是为", Token::INSTANCEOF) \
KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("接", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('l') \
KEYWORD("let", Token::LET) \
ZWKEYWORD("让", Token::LET) \
KEYWORD_GROUP('m') \
KEYWORD("meta", Token::META) \
ZWKEYWORD("元", Token::META) \
KEYWORD_GROUP('n') \
KEYWORD("name", Token::NAME) \
ZWKEYWORD("名", Token::NAME) \
KEYWORD("new", Token::NEW) \
ZWKEYWORD("新", Token::NEW) \
KEYWORD("null", Token::NULL_LITERAL) \
ZWKEYWORD("无", Token::NULL_LITERAL) \
KEYWORD_GROUP('o') \
KEYWORD("of", Token::OF) \
ZWKEYWORD("之", Token::OF) \
KEYWORD_GROUP('p') \
KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("包", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("私", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("保", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("prototype", Token::PROTOTYPE) \
ZWKEYWORD("原型", Token::PROTOTYPE) \
KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
ZWKEYWORD("公", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('r') \
KEYWORD("return", Token::RETURN) \
ZWKEYWORD("返", Token::RETURN) \
KEYWORD_GROUP('s') \
KEYWORD("set", Token::SET) \
ZWKEYWORD("设", Token::SET) \
KEYWORD("static", Token::STATIC) \
ZWKEYWORD("固", Token::STATIC) \
KEYWORD("super", Token::SUPER) \
ZWKEYWORD("超", Token::SUPER) \
KEYWORD("switch", Token::SWITCH) \
ZWKEYWORD("切", Token::SWITCH) \
KEYWORD_GROUP('t') \
KEYWORD("target", Token::TARGET) \
ZWKEYWORD("标", Token::TARGET) \
KEYWORD("this", Token::THIS) \
ZWKEYWORD("此", Token::THIS) \
KEYWORD("throw", Token::THROW) \
ZWKEYWORD("抛", Token::THROW) \
KEYWORD("true", Token::TRUE_LITERAL) \
ZWKEYWORD("真", Token::TRUE_LITERAL) \
KEYWORD("try", Token::TRY) \
ZWKEYWORD("试", Token::TRY) \
KEYWORD("typeof", Token::TYPEOF) \
ZWKEYWORD("之型", Token::TYPEOF) \
KEYWORD_GROUP('u') \
KEYWORD("undefined", Token::UNDEFINED) \
ZWKEYWORD("未定", Token::UNDEFINED) \
KEYWORD_GROUP('v') \
KEYWORD("var", Token::VAR) \
ZWKEYWORD("变", Token::VAR) \
KEYWORD("void", Token::VOID) \
ZWKEYWORD("空", Token::VOID) \
KEYWORD_GROUP('w') \
KEYWORD("while", Token::WHILE) \
ZWKEYWORD("当", Token::WHILE) \
KEYWORD("with", Token::WITH) \
ZWKEYWORD("为之", Token::WITH) \
KEYWORD_GROUP('y') \
KEYWORD("yield", Token::YIELD) \
ZWKEYWORD("降", Token::YIELD) \
KEYWORD_GROUP('_') \
KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \
ZWKEYWORD("__原__", Token::PROTO_UNDERSCORED) \
KEYWORD_GROUP('#') \
KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR) \
ZWKEYWORD("#构造器", Token::PRIVATE_CONSTRUCTOR)
3.仿照KeywordOrIdentifierToken写一个ZWKeywordOrIdentifierToken
static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
int input_length) {
DCHECK_GE(input_length, 1);
const int kMinLength = 2;
const int kMaxLength = 12;
if (input_length < kMinLength || input_length > kMaxLength) {
return Token::IDENTIFIER;
}
switch (input[0]) {
default:
#define ZWKEYWORD(keyword, token)
#define KEYWORD_GROUP_CASE(ch) \
break; \
case ch:
#define KEYWORD(keyword, token) \
{ \
/* 'keyword' is a char array, so sizeof(keyword) is */ \
/* strlen(keyword) plus 1 for the NUL char. */ \
const int keyword_length = sizeof(keyword) - 1; \
STATIC_ASSERT(keyword_length >= kMinLength); \
STATIC_ASSERT(keyword_length <= kMaxLength); \
DCHECK_EQ(input[0], keyword[0]); \
DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
if (input_length == keyword_length && input[1] == keyword[1] && \
(keyword_length <= 2 || input[2] == keyword[2]) && \
(keyword_length <= 3 || input[3] == keyword[3]) && \
(keyword_length <= 4 || input[4] == keyword[4]) && \
(keyword_length <= 5 || input[5] == keyword[5]) && \
(keyword_length <= 6 || input[6] == keyword[6]) && \
(keyword_length <= 7 || input[7] == keyword[7]) && \
(keyword_length <= 8 || input[8] == keyword[8]) && \
(keyword_length <= 9 || input[9] == keyword[9]) && \
(keyword_length <= 10 || input[10] == keyword[10])) { \
return token; \
} \
}
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD, ZWKEYWORD)
}
return Token::IDENTIFIER;
//#undef KEYWORDS
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
#undef ZWKEYWORD
}
#define IS_WCHAR_EQ_U16() (sizeof(wchar_t) == sizeof(uint16_t))
static Token::Value ZWKeywordOrIdentifierToken(const uint16_t* input,
int input_length) {
#define ZWKEYWORD(keyword, token) .Case(L##keyword, token)
#define KEYWORD_GROUP_CASE(ch)
#define KEYWORD(keyword, token)
std::wstring temp_input;
if (IS_WCHAR_EQ_U16())
temp_input.append((const wchar_t *)input, input_length);
else for (int i = 0; i < input_length; ++i)
temp_input.push_back((wchar_t)input[i]);
return StringSwitch<Token::Value>(temp_input)
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD, ZWKEYWORD)
.Default(Token::IDENTIFIER);
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
#undef ZWKEYWORD
#undef KEYWORDS
}
node.js的稳点版,改点在ScanIdentifierOrKeyword中:
if (next_.literal_chars->is_one_byte()) {
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD ||
Token::IsContextualKeyword(token))
literal.Complete();
return token;
} else {
Vector<const uint16_t> chars = next_.literal_chars->two_byte_literal();
Token::Value token =
ZWKeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD ||
Token::IsContextualKeyword(token))
literal.Complete();
return token;
}
node.js在github上的master,改点在ScanIdentifierOrKeywordInner中:
if (next_.literal_chars->is_one_byte()) {
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
literal->Complete();
if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
return token;
}
if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
literal->Complete();
return token;
}
if (!escaped) return token;
literal->Complete();
if (token == Token::LET || token == Token::STATIC) {
return Token::ESCAPED_STRICT_RESERVED_WORD;
}
return Token::ESCAPED_KEYWORD;
} else {
Vector<const uint8_t> chars = next_.literal_chars->two_byte_literal();
Token::Value token =
ZWKeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
literal->Complete();
if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
return token;
}
if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
literal->Complete();
return token;
}
if (!escaped) return token;
literal->Complete();
if (token == Token::LET || token == Token::STATIC) {
return Token::ESCAPED_STRICT_RESERVED_WORD;
}
return Token::ESCAPED_KEYWORD;
}
二. 修改node.js的 js库,对内置对象和函数添加中文名称
修改实例:
添加 “控制台.日志” 等效 “console.log”
1.添加 “console” 中文名 “控制台”.
lib\internal\bootstrap\node.js // github master
lib\internal\bootstrap_node.js // web download
Object.defineProperty(global, 'console', {
configurable: true,
enumerable: true,
get() {
return wrappedConsole;
}
});
// 仿照添加 “控制台” 等效 “console”
Object.defineProperty(global, '控制台', {
configurable: true,
enumerable: true,
get() {
return wrappedConsole;
}
});
lib\console.js:
Console.prototype.debug = Console.prototype.log;
// 仿照debug和info,添加一条“日志” 等效 “log”
Console.prototype.日志 = Console.prototype.log;
Console.prototype.info = Console.prototype.log;
三 编译
./configure
make all
四 运行实例:
hello.js //放在了 out/Release 中
函数 说1(词) {
控制台.日志(词);
}
函数 执行(某函数, 值) {
某函数(值);
}
变 你 = "你";
变 哈哈 = {
哈1: '好',
哈2: 12
}
执行(说1, 你);
控制台.日志(哈哈);
运行:
cd out/Release
./node hello.js
结果:
你
{ '哈1': '好', '哈2': 12 }